Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import os
|
2 |
import openai
|
|
|
3 |
import torch
|
4 |
from transformers import pipeline
|
5 |
|
@@ -49,7 +50,6 @@ client = OpenAI(
|
|
49 |
|
50 |
|
51 |
def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
|
52 |
-
"""Generates LLM response for given text with streaming support, handling GPU cold-start errors."""
|
53 |
full_response = []
|
54 |
try:
|
55 |
chat_completion = client.chat.completions.create(
|
@@ -67,16 +67,14 @@ def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024
|
|
67 |
frequency_penalty=None,
|
68 |
presence_penalty=None
|
69 |
)
|
70 |
-
# Collect streamed response chunks
|
71 |
for chunk in chat_completion:
|
72 |
if chunk.choices[0].delta.content:
|
73 |
full_response.append(chunk.choices[0].delta.content)
|
74 |
return "".join(full_response)
|
75 |
-
except
|
76 |
-
#
|
77 |
return "The GPU is currently booting up. Please wait about 10 minutes and try again."
|
78 |
|
79 |
-
|
80 |
generate_llm_response("Explain Deep Learning in Igbo")
|
81 |
|
82 |
|
|
|
1 |
import os
|
2 |
import openai
|
3 |
+
from openai.error import InternalServerError # Import the error class
|
4 |
import torch
|
5 |
from transformers import pipeline
|
6 |
|
|
|
50 |
|
51 |
|
52 |
def generate_llm_response(text, model_id="ccibeekeoc42/Llama3.1-8b-base-SFT-2024-11-09"):
|
|
|
53 |
full_response = []
|
54 |
try:
|
55 |
chat_completion = client.chat.completions.create(
|
|
|
67 |
frequency_penalty=None,
|
68 |
presence_penalty=None
|
69 |
)
|
|
|
70 |
for chunk in chat_completion:
|
71 |
if chunk.choices[0].delta.content:
|
72 |
full_response.append(chunk.choices[0].delta.content)
|
73 |
return "".join(full_response)
|
74 |
+
except InternalServerError as e:
|
75 |
+
# This error is raised when the GPU is unavailable (e.g. scaling down to 0 after idle)
|
76 |
return "The GPU is currently booting up. Please wait about 10 minutes and try again."
|
77 |
|
|
|
78 |
generate_llm_response("Explain Deep Learning in Igbo")
|
79 |
|
80 |
|