MatteoScript commited on
Commit
c53513a
·
1 Parent(s): 7487ea3

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +38 -1
main.py CHANGED
@@ -1,7 +1,44 @@
1
  from fastapi import FastAPI
 
2
 
3
  app = FastAPI()
 
 
 
 
 
 
 
 
 
4
 
5
  @app.get("/")
6
  def read_root():
7
- return {"Hello": "World!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from huggingface_hub import InferenceClient
3
 
4
  app = FastAPI()
5
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
6
+
7
+ def format_prompt(message, history):
8
+ prompt = "<s>"
9
+ for user_prompt, bot_response in history:
10
+ prompt += f"[INST] {user_prompt} [/INST]"
11
+ prompt += f" {bot_response}</s> "
12
+ prompt += f"[INST] {message} [/INST]"
13
+ return prompt
14
 
15
  @app.get("/")
16
  def read_root():
17
+ user_input = "Come Stai?" # Puoi passare l'input desiderato da qui
18
+ history = [] # Puoi definire la history se necessario
19
+ generated_response = next(generate(user_input, history)) # Ottieni la risposta generata
20
+ return {"response": generated_response} # Restituisci la risposta generata come JSON
21
+
22
+ def generate(prompt, history, temperature=0.2, max_new_tokens=30000, top_p=0.95, repetition_penalty=1.0):
23
+ temperature = float(temperature)
24
+ if temperature < 1e-2:
25
+ temperature = 1e-2
26
+ top_p = float(top_p)
27
+
28
+ generate_kwargs = dict(
29
+ temperature=temperature,
30
+ max_new_tokens=max_new_tokens,
31
+ top_p=top_p,
32
+ repetition_penalty=repetition_penalty,
33
+ do_sample=True,
34
+ seed=42,
35
+ )
36
+ formatted_prompt = format_prompt(prompt, history)
37
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
38
+ output = ""
39
+
40
+ for response in stream:
41
+ output += response.token.text
42
+ yield output
43
+ return output
44
+