Pg-Research

Build error

zolicsaki commited on Sep 20, 2024

Commit

96c79d5

verified ·

1 Parent(s): c18ceb6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -63,7 +63,7 @@ def st_capture(output_func: Callable[[str], None]) -> Generator:
         stdout.write = new_write  # type: ignore
         yield
-async def run_samba_api_inference(query, system_prompt = None, ignore_context=False, max_tokens_to_generate=None, num_seconds_to_sleep=2):
     # First construct messages
     messages = []
     if system_prompt is not None:
@@ -94,10 +94,10 @@ async def run_samba_api_inference(query, system_prompt = None, ignore_context=Fa
         post_response = await asyncio.get_event_loop().run_in_executor(None, lambda: requests.post(config.get("url"), json=payload, headers=headers, stream=True))
         post_response.raise_for_status()
     except requests.exceptions.HTTPError as e:
-        if post_response.status_code in {401, 503, 504}:
             st.info(f"Attempt failed due to rate limit or gate timeout. Status code: {post_response.status_code}. Trying again in {num_seconds_to_sleep} seconds...")
             return ""
-        if post_response.status_code in {429}:
             st.info("Rate limit hit because of all the pipelined queries, wait one second...")
             await asyncio.sleep(num_seconds_to_sleep)
             return await run_samba_api_inference(query)  # Retry the request

         stdout.write = new_write  # type: ignore
         yield
+async def run_samba_api_inference(query, system_prompt = None, ignore_context=False, max_tokens_to_generate=None, num_seconds_to_sleep=5):
     # First construct messages
     messages = []
     if system_prompt is not None:
         post_response = await asyncio.get_event_loop().run_in_executor(None, lambda: requests.post(config.get("url"), json=payload, headers=headers, stream=True))
         post_response.raise_for_status()
     except requests.exceptions.HTTPError as e:
+        if post_response.status_code in {401, 503}:
             st.info(f"Attempt failed due to rate limit or gate timeout. Status code: {post_response.status_code}. Trying again in {num_seconds_to_sleep} seconds...")
             return ""
+        if post_response.status_code in {429, 504}:
             st.info("Rate limit hit because of all the pipelined queries, wait one second...")
             await asyncio.sleep(num_seconds_to_sleep)
             return await run_samba_api_inference(query)  # Retry the request