File size: 6,235 Bytes
c02b4bf
 
 
 
6113980
b26b0a3
1c581ef
 
c02b4bf
6113980
 
07026cb
f06025d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138851d
07026cb
 
 
 
138851d
a880370
07026cb
 
 
 
 
 
 
 
2742bc2
02412d9
6113980
 
 
1c581ef
6113980
1c581ef
6113980
 
d3ae86a
1c581ef
 
c02b4bf
d3ae86a
c02b4bf
 
 
 
1c581ef
c02b4bf
 
1c581ef
c02b4bf
1c581ef
c02b4bf
1c581ef
c02b4bf
1c581ef
 
b518cbb
7502cc2
b518cbb
 
 
 
 
 
 
 
 
 
c02b4bf
1c581ef
4d9ef07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfbc154
b56ec7e
c02b4bf
 
 
 
 
 
 
 
 
 
 
1c581ef
 
 
 
c02b4bf
1c581ef
 
 
c02b4bf
1c581ef
 
 
 
 
c02b4bf
 
 
 
4dfdae4
1c581ef
c02b4bf
4d9ef07
 
 
 
 
 
c02b4bf
4d9ef07
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import gradio as gr
import json
from huggingface_hub import InferenceClient
import gspread
from google.oauth2 import service_account
from datetime import datetime
import chromadb

# Google Sheets setup
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
key1 = os.getenv("key1")
key2 = os.getenv("key2")
key3 = os.getenv("key3")
key4 = os.getenv("key4")
key5 = os.getenv("key5")
key6 = os.getenv("key6")
key7 = os.getenv("key7")
key8 = os.getenv("key8")
key9 = os.getenv("key9")
key10 = os.getenv("key10")
key11 = os.getenv("key11")
key12 = os.getenv("key12")
key13 = os.getenv("key13")
key14 = os.getenv("key14")
key15 = os.getenv("key15")
key16 = os.getenv("key16")
key17 = os.getenv("key17")
key18 = os.getenv("key18")
key19 = os.getenv("key19")
key20 = os.getenv("key20")
key21 = os.getenv("key21")
key22 = os.getenv("key22")
key23 = os.getenv("key23")
key24 = os.getenv("key24")
key25 = os.getenv("key25")
key26 = os.getenv("key26")
key27 = os.getenv("key27")
key28 = os.getenv("key28")
pkey="-----BEGIN PRIVATE KEY-----\n"+key2+"\n"+key3+"\n"+ key4+"\n"+key5+"\n"+ key6+"\n"+key7+"\n"+key8+"\n"+key9+"\n"+key10+"\n"+key11+"\n"+key12+"\n"+key13+"\n"+key14+"\n"+key15+"\n"+key16+"\n"+key17+"\n"+key18+"\n"+key19+"\n"+key20+"\n"+key21+"\n"+key22+"\n"+key24+"\n"+key25+"\n"+key26+"\n"+key27+"\n"+key28+"\n-----END PRIVATE KEY-----\n"
json_data={
  "type": "service_account",
  "project_id": "nestolechatbot",
  "private_key_id": key1,
  "private_key": pkey,
  "client_email": "[email protected]",
  "client_email": "[email protected]",
  "client_id": "107457262210035412036",
  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
  "token_uri": "https://oauth2.googleapis.com/token",
  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/nestoleservice%40nestolechatbot.iam.gserviceaccount.com",
  "universe_domain": "googleapis.com"
}
creds = service_account.Credentials.from_service_account_info(json_data, scopes=scope)

client = gspread.authorize(creds)
sheet = client.open("nestolechatbot").sheet1  # Open the sheet

def save_to_sheet(date, name, message):
    # Write user input to the Google Sheet
    sheet.append_row([date, name, message])
    return f"Thanks {name}, your message has been saved!"

path='/Users/thiloid/Desktop/LSKI/ole_nest/Chatbot/LLM/chromaTS'
if not os.path.exists(path):
    path = "/home/user/app/chromaTS"

print(path)
client = chromadb.PersistentClient(path=path)
print(client.heartbeat()) 
print(client.get_version())  
print(client.list_collections()) 

from chromadb.utils import embedding_functions
default_ef = embedding_functions.DefaultEmbeddingFunction()
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")

collection = client.get_collection(name="chromaTS", embedding_function=sentence_transformer_ef)

inference_client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

# Global variable to store the URL
global_url = ""

def format_prompt(message, history):
    print("HISTORY")
    print(history)
    prompt = ""
    if history:
        user_prompt, bot_response = history[-1]
        prompt += f"[INST] {user_prompt} [/INST] {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
    print("Final P")
    print(prompt)
    return prompt

def response(prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0):
    global_url = ""  # Initialize URL variable
    # JavaScript code to extract URL from the browser
    js_code = """
    <script>
    function extractUrl() {
        return window.location.href;
    }
    </script>
    """
    
    # Extract URL using JavaScript
    url_script = '<script>var url = extractUrl(); document.getElementById("url").innerText = url;</script>'
    url_extracted = "<div id='url'></div>"  # Placeholder for URL extraction

    print(f"Working with URL: {url_extracted}")
    headers = request.headers
    print(headers)
    temperature = float(temperature)
    if temperature < 1e-2: temperature = 1e-2
    top_p = float(top_p)
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    search_prompt = format_prompt(prompt, history)
    results = collection.query(
        query_texts=[search_prompt],
        n_results=60,
    )
    dists = ["<br><small>(relevance: " + str(round((1-d)*100)/100) + ";" for d in results['distances'][0]]
    results = results['documents'][0]
    combination = zip(results, dists)
    combination = [' '.join(triplets) for triplets in combination]
    if len(results) > 1:
        addon = "Bitte berücksichtige bei deiner Antwort ausschießlich folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n" + "\n".join(results)
    system = "Du bist ein deutschsprachiges KI-basiertes Studienberater Assistenzsystem, das zu jedem Anliegen möglichst geeignete Studieninformationen empfiehlt." + addon + "\n\nUser-Anliegen:"   
    formatted_prompt = format_prompt(system + "\n" + prompt, history)
    stream = inference_client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
    now = str(datetime.now())
    save_to_sheet(now, prompt, output)
    yield output
    
gr.ChatInterface(
    response,
    chatbot=gr.Chatbot(value=[[None, "Herzlich willkommen! Ich bin Chätti ein KI-basiertes Studienassistenzsystem, das für jede Anfrage die am besten Studieninformationen empfiehlt.<br>Erzähle mir, was du gerne tust!"]], render_markdown=True),
    title="German Studyhelper Chätti"
).queue().launch(share=True)

print("Interface up and running!")