api-smollm135m

Sleeping

App Files Files Community

Reality123b commited on Jan 21

Commit

1975705

verified ·

1 Parent(s): fc4b315

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -16

app.py CHANGED Viewed

@@ -34,7 +34,8 @@ app.add_middleware(
 # Initialize Hugging Face client
 hf_client = InferenceClient(
-    api_key=os.getenv("HF_TOKEN"),
     timeout=30
 )
@@ -45,21 +46,19 @@ async def generate_stream(messages: List[Message], max_tokens: int, temperature:
     try:
         # Convert messages to the format expected by the API
         formatted_messages = [{"role": msg.role, "content": msg.content} for msg in messages]
-        # Create the streaming completion
-        stream = hf_client.chat.completions.create(
-            model=MODEL_ID,
-            messages=formatted_messages,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            top_p=top_p,
-            stream=True
-        )
         # Stream the response chunks
-        for chunk in stream:
-            if chunk.choices[0].delta.content is not None:
-                yield chunk.choices[0].delta.content
     except Exception as e:
         logger.error(f"Error in generate_stream: {e}", exc_info=True)
@@ -71,13 +70,13 @@ async def chat_stream(input: ChatInput, request: Request):
     try:
         if not os.getenv("HF_TOKEN"):
             raise HTTPException(
-                status_code=500,
                 detail="HF_TOKEN environment variable not set"
             )
         logger.info(f"Received chat request from {request.client.host}")
         logger.info(f"Number of messages: {len(input.messages)}")
         return StreamingResponse(
             generate_stream(
                 messages=input.messages,

 # Initialize Hugging Face client
 hf_client = InferenceClient(
+    model=os.getenv("MODEL_ID", "mistralai/Mistral-Nemo-Instruct-2407"), # default model added to client
+    token=os.getenv("HF_TOKEN"), # renamed api_key to token
     timeout=30
 )
     try:
         # Convert messages to the format expected by the API
         formatted_messages = [{"role": msg.role, "content": msg.content} for msg in messages]
         # Stream the response chunks
+        for chunk in hf_client.text_generation(
+            prompt= formatted_messages, # updated to text_generation
+            details=True,
+            max_new_tokens=max_tokens, # renamed max_tokens to max_new_tokens
+            temperature=temperature,
+            top_p=top_p,
+            do_sample=True,
+            stream=True,
+        ):
+            if chunk.token.text is not None:
+                yield chunk.token.text
     except Exception as e:
         logger.error(f"Error in generate_stream: {e}", exc_info=True)
     try:
         if not os.getenv("HF_TOKEN"):
             raise HTTPException(
+                status_code=500,
                 detail="HF_TOKEN environment variable not set"
             )
         logger.info(f"Received chat request from {request.client.host}")
         logger.info(f"Number of messages: {len(input.messages)}")
         return StreamingResponse(
             generate_stream(
                 messages=input.messages,