Robostar commited on
Commit
fec2d9d
·
verified ·
1 Parent(s): f433466

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from huggingface_hub import InferenceClient
@@ -11,9 +13,12 @@ app = FastAPI()
11
  #model_name = "serkanarslan/mistral-7b-mini-ft"
12
  # Choose a smaller model for free-tier
13
  #model_name = "microsoft/phi-2"
14
- model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # You can switch to Phi-2, OpenChat, etc.
 
15
 
16
- client = InferenceClient(model_name)
 
 
17
 
18
  # Define request format
19
  class ChatRequest(BaseModel):
@@ -21,11 +26,13 @@ class ChatRequest(BaseModel):
21
 
22
  @app.post("/chat")
23
  async def chat(request: ChatRequest):
24
- # Send message to Hugging Face Inference API
25
- response = client.text_generation(request.message, max_new_tokens=100)
26
- return {"response": response}
27
-
28
-
 
 
29
 
30
 
31
 
 
1
+
2
+
3
  from fastapi import FastAPI
4
  from pydantic import BaseModel
5
  from huggingface_hub import InferenceClient
 
13
  #model_name = "serkanarslan/mistral-7b-mini-ft"
14
  # Choose a smaller model for free-tier
15
  #model_name = "microsoft/phi-2"
16
+ #model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # You can switch to Phi-2, OpenChat, etc.
17
+
18
 
19
+ # Use the full Hugging Face Inference API URL
20
+ HF_API_URL = "https://api-inference.huggingface.co/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0"
21
+ client = InferenceClient(HF_API_URL)
22
 
23
  # Define request format
24
  class ChatRequest(BaseModel):
 
26
 
27
  @app.post("/chat")
28
  async def chat(request: ChatRequest):
29
+ # Corrected function call with `model` argument
30
+ response = client.text_generation(
31
+ request.message,
32
+ model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
33
+ max_new_tokens=100
34
+ )
35
+ return {"response": response} # ✅ Removed extra quote
36
 
37
 
38