Addition of Error Flags: retriable and fatal (stanford-crfm#1533)

JosselinSomervilleRoberts · web-flow · commit 7fe63cdb2149 · 2023-05-08T13:29:59.000-07:00
diff --git a/src/helm/benchmark/executor.py b/src/helm/benchmark/executor.py
@@ -3,7 +3,7 @@
 
 from helm.common.general import parallel_map
 from helm.common.hierarchical_logger import htrack, hlog
-from helm.common.request import RequestResult
+from helm.common.request import RequestResult, Sequence
 from helm.common.authentication import Authentication
 from helm.proxy.services.remote_service import RemoteService
 from helm.proxy.services.server_service import ServerService
@@ -85,5 +85,9 @@ def execute(self, scenario_state: ScenarioState) -> ScenarioState:
     def process(self, state: RequestState) -> RequestState:
         result: RequestResult = self.service.make_request(self.execution_spec.auth, state.request)
         if not result.success:
-            raise ExecutorError(f"{str(result.error)} Request: {state.request}")
+            if result.error_flags and not result.error_flags.is_fatal:
+                hlog(f"WARNING: Non-fatal error treated as empty completion: {result.error}")
+                result.completions = [Sequence(text="", logprob=0, tokens=[])]
+            else:
+                raise ExecutorError(f"{str(result.error)} Request: {state.request}")
         return replace(state, result=result)
diff --git a/src/helm/common/request.py b/src/helm/common/request.py
@@ -129,6 +129,19 @@ def render_lines(self) -> List[str]:
 
 
 @dataclass(frozen=True)
+class ErrorFlags:
+    """Describes how to treat errors in the request."""
+
+    is_retriable: Optional[bool] = None
+    """Whether the request is retriable or whether the error is permanent.
+    If None, the error is treated as retriable."""
+
+    is_fatal: Optional[bool] = None
+    """Whether the error is fatal, i.e. the run should be discarded.
+    If None, the error is treated as fatal."""
+
+
+@dataclass(frozen=False)
 class RequestResult:
     """What comes back due to a `Request`."""
 
@@ -155,6 +168,9 @@ class RequestResult:
     error: Optional[str] = None
     """If `success` is false, what was the error?"""
 
+    error_flags: Optional[ErrorFlags] = None
+    """Describes how to treat errors in the request."""
+
     batch_size: Optional[int] = None
     """Batch size (`TogetherClient` only)"""
 
diff --git a/src/helm/proxy/clients/anthropic_client.py b/src/helm/proxy/clients/anthropic_client.py
@@ -9,7 +9,14 @@
 
 from helm.common.cache import Cache, CacheConfig
 from helm.common.hierarchical_logger import htrack_block, hlog
-from helm.common.request import EMBEDDING_UNAVAILABLE_REQUEST_RESULT, Request, RequestResult, Sequence, Token
+from helm.common.request import (
+    EMBEDDING_UNAVAILABLE_REQUEST_RESULT,
+    Request,
+    RequestResult,
+    Sequence,
+    Token,
+    ErrorFlags,
+)
 from helm.common.tokenization_request import (
     TokenizationRequest,
     TokenizationRequestResult,
@@ -22,14 +29,6 @@
 from dataclasses import asdict
 
 
-class AnthropicPromptTooLongError(Exception):
-    pass
-
-
-class AnthropicPromptPlusMaxTokensTooLongError(Exception):
-    pass
-
-
 class AnthropicClient(Client):
     """
     Client for the Anthropic models (https://arxiv.org/abs/2204.05862).
@@ -138,9 +137,23 @@ def do_it():
                 response, cached = self.cache.get(cache_key, wrap_request_time(do_it))
             except Exception as error:
                 if "Prompt must contain anthropic.AI_PROMPT" in str(error):
-                    raise AnthropicPromptTooLongError(f"Prompt too long: {request.prompt}")
+                    return RequestResult(
+                        success=False,
+                        cached=False,
+                        error=response["error"],
+                        completions=[],
+                        embedding=[],
+                        error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
+                    )
                 if "exceeds max (" in str(error):
-                    raise AnthropicPromptPlusMaxTokensTooLongError(f"Prompt + max_tokens too long: {request.prompt}")
+                    return RequestResult(
+                        success=False,
+                        cached=False,
+                        error=response["error"],
+                        completions=[],
+                        embedding=[],
+                        error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
+                    )
                 return RequestResult(success=False, cached=False, error=str(error), completions=[], embedding=[])
 
             # Post process the completion.
diff --git a/src/helm/proxy/clients/palmyra_client.py b/src/helm/proxy/clients/palmyra_client.py
@@ -4,7 +4,7 @@
 
 from helm.common.cache import Cache, CacheConfig
 from helm.common.hierarchical_logger import hlog
-from helm.common.request import Request, RequestResult, Sequence, Token
+from helm.common.request import Request, RequestResult, Sequence, Token, ErrorFlags
 from helm.common.tokenization_request import (
     DecodeRequest,
     DecodeRequestResult,
@@ -75,8 +75,6 @@ def make_request(self, request: Request) -> RequestResult:
 
                 def do_it():
                     result = self._send_request(model_name, raw_request)
-                    if "choices" not in result:
-                        raise ValueError(f"Invalid response: {result}")
                     return result
 
                 # We need to include the engine's name to differentiate among requests made for different model
@@ -99,6 +97,21 @@ def do_it():
                 error: str = f"PalmyraClient error: {e}"
                 return RequestResult(success=False, cached=False, error=error, completions=[], embedding=[])
 
+            if "choices" not in response:
+                if "errors" in response and response["errors"][0]["key"] == "fail.content.moderation.failed":
+                    return RequestResult(
+                        success=False,
+                        cached=False,
+                        error=response["errors"][0]["description"],
+                        completions=[],
+                        embedding=[],
+                        error_flags=ErrorFlags(is_retriable=False, is_fatal=False),
+                        request_time=response["request_time"],
+                        request_datetime=response["request_datetime"],
+                    )
+                else:
+                    raise ValueError(f"Invalid response: {response}")
+
             response_text: str = response["choices"][0]["text"]
 
             # The Writer API doesn't support echo. If `echo_prompt` is true, combine the prompt and completion.
diff --git a/src/helm/proxy/retry.py b/src/helm/proxy/retry.py
@@ -48,7 +48,12 @@ def retry_if_request_failed(result: Union[RequestResult, TokenizationRequestResu
     """Fails if `success` of `RequestResult` or `TokenizationRequestResult` is false."""
     if not result.success:
         hlog(result.error)
-    return not result.success
+    retry_if_fail: bool = True
+    if isinstance(result, RequestResult):
+        retry_if_fail = (
+            result.error_flags is None or result.error_flags.is_retriable is None or result.error_flags.is_retriable
+        )
+    return not result.success and retry_if_fail
 
 
 retry_request: Callable = get_retry_decorator(