Thiloid commited on
Commit
1c581ef
·
verified ·
1 Parent(s): d6d6731

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +68 -70
run.py CHANGED
@@ -1,12 +1,11 @@
1
- import chromadb
2
  import os
3
  import gradio as gr
4
  import json
5
  from huggingface_hub import InferenceClient
6
  import gspread
7
- from oauth2client.service_account import ServiceAccountCredentials
8
- from datetime import datetime
9
  from google.oauth2 import service_account
 
 
10
 
11
  # Google Sheets setup
12
  scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
@@ -55,76 +54,50 @@ json_data={
55
  }
56
  creds = service_account.Credentials.from_service_account_info(json_data, scopes=scope)
57
 
58
- #creds = ServiceAccountCredentials.from_json_keyfile_name('/home/user/app/chromaold/nestolechatbot-5fe2aa26cb52.json', scope)
59
  client = gspread.authorize(creds)
60
  sheet = client.open("nestolechatbot").sheet1 # Open the sheet
61
 
62
- def save_to_sheet(date,name, message):
63
  # Write user input to the Google Sheet
64
- sheet.append_row([date,name, message])
65
  return f"Thanks {name}, your message has been saved!"
66
 
67
-
68
  path='/Users/thiloid/Desktop/LSKI/ole_nest/Chatbot/LLM/chromaTS'
69
- if(os.path.exists(path)==False): path="/home/user/app/chromaTS"
 
70
 
71
  print(path)
72
- #path='chromaTS'
73
- #settings = Settings(persist_directory=storage_path)
74
- #client = chromadb.Client(settings=settings)
75
  client = chromadb.PersistentClient(path=path)
76
  print(client.heartbeat())
77
  print(client.get_version())
78
  print(client.list_collections())
 
79
  from chromadb.utils import embedding_functions
80
  default_ef = embedding_functions.DefaultEmbeddingFunction()
81
- sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")#"VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct")
82
- #instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
83
- #print(str(client.list_collections()))
84
- collection = client.get_collection(name="chromaTS", embedding_function=sentence_transformer_ef)
85
 
86
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
87
 
 
88
 
89
- #def format_prompt(message, history):
90
- #print("HISTORY")
91
- #print(history)
92
- #prompt = "" #"<s>"
93
- #c=1
94
- #for user_prompt, bot_response in history:
95
- # if c<2:
96
- # prompt += f"[INST] {user_prompt} [/INST]"
97
- # prompt += f" {bot_response}</s> "
98
- # c=c+1
99
- #prompt += f"[INST] {message} [/INST]"
100
- #print("Final P")
101
- #print(prompt)
102
- #return prompt
103
 
104
  def format_prompt(message, history):
105
  print("HISTORY")
106
  print(history)
107
-
108
- # Initialize prompt with an empty string
109
  prompt = ""
110
-
111
- # Check if history is not empty
112
  if history:
113
- # Extract the most recent user prompt and bot response
114
  user_prompt, bot_response = history[-1]
115
  prompt += f"[INST] {user_prompt} [/INST] {bot_response}</s> "
116
-
117
- # Add the new message to the prompt
118
  prompt += f"[INST] {message} [/INST]"
119
-
120
  print("Final P")
121
  print(prompt)
122
-
123
  return prompt
124
 
125
- def response(
126
- prompt, history,temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
127
- ):
128
  temperature = float(temperature)
129
  if temperature < 1e-2: temperature = 1e-2
130
  top_p = float(top_p)
@@ -136,42 +109,67 @@ def response(
136
  do_sample=True,
137
  seed=42,
138
  )
139
- search_prompt = format_prompt(prompt,history)
140
- results=collection.query(
141
- query_texts=[search_prompt],
142
- n_results=60,
143
- #where={"source": "google-docs"}
144
- #where_document={"$contains":"search_string"}
145
  )
146
- #print("REsults")
147
- #print(results)
148
- #print("_____")
149
- dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in results['distances'][0]]
150
-
151
- #sources=["source: "+s["source"]+")</small>" for s in results['metadatas'][0]]
152
- results=results['documents'][0]
153
- #print("TEst")
154
- #print(results)
155
- #print("_____")
156
- combination = zip(results,dists)
157
  combination = [' '.join(triplets) for triplets in combination]
158
- #print(str(prompt)+"\n\n"+str(combination))
159
- if(len(results)>1):
160
- addon="Bitte berücksichtige bei deiner Antwort ausschießlich folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
161
- system="Du bist ein deutschsprachiges KI-basiertes Studienberater Assistenzsystem, das zu jedem Anliegen möglichst geeignete Studieninformationen empfiehlt."+addon+"\n\nUser-Anliegen:"
162
- formatted_prompt = format_prompt(system+"\n"+prompt,history)
163
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
164
  output = ""
165
  for response in stream:
166
  output += response.token.text
167
  yield output
168
- #output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
169
- # Get current date and time
170
  now = str(datetime.now())
171
- save_to_sheet(now,prompt, output)
172
  yield output
173
 
174
- gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin Chätti ein KI-basiertes Studienassistenzsystem, das für jede Anfrage die am besten Studieninformationen empfiehlt.<br>Erzähle mir, was du gerne tust!"]],render_markdown=True),title="German Studyhelper Chätti").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
175
- print("Interface up and running!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
 
 
 
 
 
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
  import json
4
  from huggingface_hub import InferenceClient
5
  import gspread
 
 
6
  from google.oauth2 import service_account
7
+ from datetime import datetime
8
+ import chromadb
9
 
10
  # Google Sheets setup
11
  scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
 
54
  }
55
  creds = service_account.Credentials.from_service_account_info(json_data, scopes=scope)
56
 
 
57
  client = gspread.authorize(creds)
58
  sheet = client.open("nestolechatbot").sheet1 # Open the sheet
59
 
60
+ def save_to_sheet(date, name, message):
61
  # Write user input to the Google Sheet
62
+ sheet.append_row([date, name, message])
63
  return f"Thanks {name}, your message has been saved!"
64
 
 
65
  path='/Users/thiloid/Desktop/LSKI/ole_nest/Chatbot/LLM/chromaTS'
66
+ if not os.path.exists(path):
67
+ path = "/home/user/app/chromaTS"
68
 
69
  print(path)
 
 
 
70
  client = chromadb.PersistentClient(path=path)
71
  print(client.heartbeat())
72
  print(client.get_version())
73
  print(client.list_collections())
74
+
75
  from chromadb.utils import embedding_functions
76
  default_ef = embedding_functions.DefaultEmbeddingFunction()
77
+ sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")
 
 
 
78
 
79
+ collection = client.get_collection(name="chromaTS", embedding_function=sentence_transformer_ef)
80
 
81
+ inference_client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
82
 
83
+ # Global variable to store the URL
84
+ global_url = ""
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  def format_prompt(message, history):
87
  print("HISTORY")
88
  print(history)
 
 
89
  prompt = ""
 
 
90
  if history:
 
91
  user_prompt, bot_response = history[-1]
92
  prompt += f"[INST] {user_prompt} [/INST] {bot_response}</s> "
 
 
93
  prompt += f"[INST] {message} [/INST]"
 
94
  print("Final P")
95
  print(prompt)
 
96
  return prompt
97
 
98
+ def response(prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0):
99
+ global global_url
100
+ print(f"Working with URL: {global_url}") # You can use the URL here
101
  temperature = float(temperature)
102
  if temperature < 1e-2: temperature = 1e-2
103
  top_p = float(top_p)
 
109
  do_sample=True,
110
  seed=42,
111
  )
112
+ search_prompt = format_prompt(prompt, history)
113
+ results = collection.query(
114
+ query_texts=[search_prompt],
115
+ n_results=60,
 
 
116
  )
117
+ dists = ["<br><small>(relevance: " + str(round((1-d)*100)/100) + ";" for d in results['distances'][0]]
118
+ results = results['documents'][0]
119
+ combination = zip(results, dists)
 
 
 
 
 
 
 
 
120
  combination = [' '.join(triplets) for triplets in combination]
121
+ if len(results) > 1:
122
+ addon = "Bitte berücksichtige bei deiner Antwort ausschießlich folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n" + "\n".join(results)
123
+ system = "Du bist ein deutschsprachiges KI-basiertes Studienberater Assistenzsystem, das zu jedem Anliegen möglichst geeignete Studieninformationen empfiehlt." + addon + "\n\nUser-Anliegen:"
124
+ formatted_prompt = format_prompt(system + "\n" + prompt, history)
125
+ stream = inference_client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
 
126
  output = ""
127
  for response in stream:
128
  output += response.token.text
129
  yield output
 
 
130
  now = str(datetime.now())
131
+ save_to_sheet(now, prompt, output)
132
  yield output
133
 
134
+ def js_code():
135
+ return """
136
+ <script>
137
+ function getUrl() {
138
+ const url = window.location.href;
139
+ const xhr = new XMLHttpRequest();
140
+ xhr.open("POST", "/submit_url", true);
141
+ xhr.setRequestHeader("Content-Type", "application/json");
142
+ xhr.onreadystatechange = function() {
143
+ if (xhr.readyState === 4 && xhr.status === 200) {
144
+ console.log("URL submitted successfully");
145
+ }
146
+ };
147
+ xhr.send(JSON.stringify({ url: url }));
148
+ }
149
+ window.onload = getUrl;
150
+ </script>
151
+ """
152
+
153
+ def submit_url(url: str):
154
+ global global_url
155
+ global_url = url # Save the URL in the global variable
156
+ print(f"Received URL: {url}")
157
+ return url
158
 
159
+ iface = gr.Interface(
160
+ fn=response,
161
+ inputs=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin Chätti ein KI-basiertes Studienassistenzsystem, das für jede Anfrage die am besten Studieninformationen empfiehlt.<br>Erzähle mir, was du gerne tust!"]], render_markdown=True),
162
+ title="German Studyhelper Chätti"
163
+ )
164
 
165
+ # Add a route to handle the URL submission
166
+ iface.launch(share=True, js=js_code())
167
+
168
+ @gr.routes.post("/submit_url")
169
+ async def process_url(request):
170
+ data = await request.json()
171
+ url = data.get("url", "")
172
+ submit_url(url)
173
+ return {"status": "success"}
174
+
175
+ print("Interface up and running!")