Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -63,7 +63,7 @@ def st_capture(output_func: Callable[[str], None]) -> Generator:
|
|
| 63 |
stdout.write = new_write # type: ignore
|
| 64 |
yield
|
| 65 |
|
| 66 |
-
async def run_samba_api_inference(query, system_prompt = None, ignore_context=False, max_tokens_to_generate=None, num_seconds_to_sleep=
|
| 67 |
# First construct messages
|
| 68 |
messages = []
|
| 69 |
if system_prompt is not None:
|
|
@@ -94,10 +94,10 @@ async def run_samba_api_inference(query, system_prompt = None, ignore_context=Fa
|
|
| 94 |
post_response = await asyncio.get_event_loop().run_in_executor(None, lambda: requests.post(config.get("url"), json=payload, headers=headers, stream=True))
|
| 95 |
post_response.raise_for_status()
|
| 96 |
except requests.exceptions.HTTPError as e:
|
| 97 |
-
if post_response.status_code in {401, 503
|
| 98 |
st.info(f"Attempt failed due to rate limit or gate timeout. Status code: {post_response.status_code}. Trying again in {num_seconds_to_sleep} seconds...")
|
| 99 |
return ""
|
| 100 |
-
if post_response.status_code in {429}:
|
| 101 |
st.info("Rate limit hit because of all the pipelined queries, wait one second...")
|
| 102 |
await asyncio.sleep(num_seconds_to_sleep)
|
| 103 |
return await run_samba_api_inference(query) # Retry the request
|
|
|
|
| 63 |
stdout.write = new_write # type: ignore
|
| 64 |
yield
|
| 65 |
|
| 66 |
+
async def run_samba_api_inference(query, system_prompt = None, ignore_context=False, max_tokens_to_generate=None, num_seconds_to_sleep=5):
|
| 67 |
# First construct messages
|
| 68 |
messages = []
|
| 69 |
if system_prompt is not None:
|
|
|
|
| 94 |
post_response = await asyncio.get_event_loop().run_in_executor(None, lambda: requests.post(config.get("url"), json=payload, headers=headers, stream=True))
|
| 95 |
post_response.raise_for_status()
|
| 96 |
except requests.exceptions.HTTPError as e:
|
| 97 |
+
if post_response.status_code in {401, 503}:
|
| 98 |
st.info(f"Attempt failed due to rate limit or gate timeout. Status code: {post_response.status_code}. Trying again in {num_seconds_to_sleep} seconds...")
|
| 99 |
return ""
|
| 100 |
+
if post_response.status_code in {429, 504}:
|
| 101 |
st.info("Rate limit hit because of all the pipelined queries, wait one second...")
|
| 102 |
await asyncio.sleep(num_seconds_to_sleep)
|
| 103 |
return await run_samba_api_inference(query) # Retry the request
|