Spaces:

ccibeekeoc42
/

Aware-Demo

Sleeping

ccibeekeoc42 commited on Feb 10

Commit

3421ba2

verified ·

1 Parent(s): b2a43b9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import openai
 import torch
 from transformers import pipeline
@@ -49,7 +50,6 @@ client = OpenAI(
 def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
-    """Generates LLM response for given text with streaming support, handling GPU cold-start errors."""
     full_response = []
     try:
         chat_completion = client.chat.completions.create(
@@ -67,16 +67,14 @@ def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024
             frequency_penalty=None,
             presence_penalty=None
         )
-        # Collect streamed response chunks
         for chunk in chat_completion:
             if chunk.choices[0].delta.content:
                 full_response.append(chunk.choices[0].delta.content)
         return "".join(full_response)
-    except openai.error.InternalServerError as e:
-        # If the error is due to the GPU scaling down, inform the user accordingly.
         return "The GPU is currently booting up. Please wait about 10 minutes and try again."
 generate_llm_response("Explain Deep Learning in Igbo")

 import os
 import openai
+from openai.error import InternalServerError  # Import the error class
 import torch
 from transformers import pipeline
 def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
     full_response = []
     try:
         chat_completion = client.chat.completions.create(
             frequency_penalty=None,
             presence_penalty=None
         )
         for chunk in chat_completion:
             if chunk.choices[0].delta.content:
                 full_response.append(chunk.choices[0].delta.content)
         return "".join(full_response)
+    except InternalServerError as e:
+        # This error is raised when the GPU is unavailable (e.g. scaling down to 0 after idle)
         return "The GPU is currently booting up. Please wait about 10 minutes and try again."
 generate_llm_response("Explain Deep Learning in Igbo")