Skip to content

Commit 2822138

Browse files
authored
Temporarily remove calibration metrics from LLaMA and Llama 2 (stanford-crfm#1848)
1 parent 9eee36a commit 2822138

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

src/helm/benchmark/presentation/summarize.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,18 @@ class Summarizer:
235235
COST_REPORT_FIELDS: List[str] = ["num_prompt_tokens", "num_completion_tokens", "num_completions", "num_requests"]
236236

237237
# We need to hide stats for these model-metric combinations
238-
LOGPROBS_ISSUE_MODELS: Set[str] = {"anthropic/stanford-online-all-v4-s3"}
238+
LOGPROBS_ISSUE_MODELS: Set[str] = {
239+
"anthropic/stanford-online-all-v4-s3",
240+
# Together sometimes returns logprobs and sometimes does not.
241+
# TODO(#1847): Enabled calibration for metrics after this is resolved.
242+
"meta/llama-7b",
243+
"meta/llama-13b",
244+
"meta/llama-30b",
245+
"meta/llama-65b",
246+
"meta/llama-2-7b",
247+
"meta/llama-2-13b",
248+
"meta/llama-2-70b",
249+
}
239250
LOGPROBS_ISSUE_METRICS: Set[str] = {
240251
# MSMARCO metrics
241252
"NDCG@10",

0 commit comments

Comments
 (0)