File size: 7,153 Bytes
c02b4bf
 
 
 
 
6113980
 
570c651
b26b0a3
c02b4bf
6113980
 
07026cb
 
 
 
 
 
 
 
 
 
 
 
 
 
2742bc2
02412d9
 
6113980
 
 
 
 
 
 
 
 
d3ae86a
 
c02b4bf
d3ae86a
c02b4bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7502cc2
c02b4bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfc7235
c02b4bf
 
 
 
 
 
 
 
 
 
a126020
 
 
c02b4bf
 
 
 
 
 
 
 
 
 
 
 
 
570c651
4dfdae4
570c651
c02b4bf
 
6c4b824
c02b4bf
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import chromadb
import os
import gradio as gr
import json
from huggingface_hub import InferenceClient
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from datetime import datetime
from google.oauth2 import service_account

# Google Sheets setup
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
key1 = os.getenv("key1")
json_data={
  "type": "service_account",
  "project_id": "nestolechatbot",
  "private_key_id": key1,
  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCg2u3D6CP9F7gD\nRsBQFvGh01gTnhzKBKpYlgUHMERezKOIXTPPNqHPONBMZN89RlFSyx4V+8rqqlCa\n7yMIMu3iHmbJuD/SkbeO1CKP5l8FuQmrNqgJZc9kxGZaAnhUrh+6JszvXoC1/Wfo\n06D0sh775VBe+H5FNKTF0XIAuLt7qp4VkiMdZGDCNIcUELQKZTChDDF2T8BLC/O0\nSzt4N76oer2jSNgr6XwSOx4HbNpwhvcf5uZokbxCUbRWXYfIxuUehzY7EhMbj5JU\nTDB9Kndzu468/qfqR3KrKPgjjH2vbP4bIcEjeMxA+VPW17hSg/imLUAGRj3uta3L\npOUyZ613AgMBAAECggEAC14dcnxb2r2L4BNmcOKGNgkBDvXcE/zLaxJDDbSACHEO\n14q6qBi0y7e4pMV2L1biVWcMW8JdH0NlCeX/hEtRG2tJmVTfkcqBfeMJBEFkErv2\nbnag3nUm+HiWwSKujVF25YlrbzkvAG2Fo+Mc214hKW0VxnhqOArV4DMCiwGDImSa\nMzUw4sga+g7iu4XoloQfyMsirsFhhba8eGgzPxVTeJjt1Iyhd0vrZXHkx9de23Hc\nUFVgnBI8gfv2RZhRHlZc0SobUvizEp3oyJECtL8zZkFCqpIqxKMPCljTqxQK8NfU\n97sbvt3fApXRi0Gz/X61FoW6mubRov6UQKqWG8QxIQKBgQDNmZR+o7ToKNOFO7Gb\nwxSR/NfZNun0rRdWe8AEcpJafWaVUmT17G9ZarlrwtVaXxh1AO2LF04JpYNT28AO\n9U1W+Y4e8Q7pKwGqdweLfuDihsx6kgUoEVX8FC6TO+qRZoN7PPwO3SipZPrz1Usd\nt3MhScrYfRgsB1829KcINtFmoQKBgQDISWTOPHFURnndQlLDPoPCRRU6GhWbBrZk\nuSf9fn94dTvgKEs3kR9kmnIt/CzIkdkW2gmHA50LENraxNCr1cwgJfje2bHouhVG\nxUNbr6pw1ZJqtUdOVUNliwTI5m61/BglHsIKDPXMVLF0XshCGgS7Uw2inb73ve0E\nGxHwpARVFwKBgQCUFh6i6H8RCPbxbAGNU4GpdRhzV95DYbkBjbjMe0l27APLYcFe\ncKHzIa/UMk1B/mS7QaPXHz4SLKje6dT38S93ieAX4UEAKP3WLk285/JKtxTo4+va\n5EGug0rYubtGZzQku9RBUIJNIHKw1dx41v2R6Hi2XcPfUp/+EVGN1Q4WgQKBgQCY\nKyrrhgCUhvcPo9Q9gsGYV+RIwIipv+/TltlLP1CZtHhJVlPPY/L1/GHDjlR4Dldh\nhUa3zYcDYUrl66Ke61CrHC55IeyJZNfFbRpADrbwA1O6vv/uA9mwzG5WNLzohx/R\nFA1YlYAqPi6Uu25qylnN+MvjQPZWQ3ZQKED5E6Q3AQKBgQCD3otki/odBpqCxFHQ\nT7lkjDLbprDWZAOzDP95yI6pWlBqMgyEHFbtqVeMHCL5I2nAZl8lFJm5Z/0xb5CJ\nyLWChBnfpgdu2QzEkugF50n6widB73xQVmu2bE+uOeq0O6+aimmh6S4H8+5peVUR\nNSpeJscWS3XTT3wXgbHil7rzIg==\n-----END PRIVATE KEY-----\n",
  "client_email": "[email protected]",
  "client_id": "107457262210035412036",
  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
  "token_uri": "https://oauth2.googleapis.com/token",
  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/nestoleservice%40nestolechatbot.iam.gserviceaccount.com",
  "universe_domain": "googleapis.com"
}
creds = service_account.Credentials.from_service_account_info(json_data, scopes=scope)

#creds = ServiceAccountCredentials.from_json_keyfile_name('/home/user/app/chromaold/nestolechatbot-5fe2aa26cb52.json', scope)
client = gspread.authorize(creds)
sheet = client.open("nestolechatbot").sheet1  # Open the sheet

def save_to_sheet(date,name, message):
    # Write user input to the Google Sheet
    sheet.append_row([date,name, message])
    return f"Thanks {name}, your message has been saved!"

    
path='/Users/thiloid/Desktop/LSKI/ole_nest/Chatbot/LLM/chromaTS'
if(os.path.exists(path)==False): path="/home/user/app/chromaTS"

print(path)
#path='chromaTS'
#settings = Settings(persist_directory=storage_path)
#client = chromadb.Client(settings=settings)
client = chromadb.PersistentClient(path=path)
print(client.heartbeat()) 
print(client.get_version())  
print(client.list_collections()) 
from chromadb.utils import embedding_functions
default_ef = embedding_functions.DefaultEmbeddingFunction()
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")#"VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct")
#instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
#print(str(client.list_collections()))
collection = client.get_collection(name="chromaTS", embedding_function=sentence_transformer_ef)

client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")


def format_prompt(message, history):
  prompt = "" #"<s>"
  #for user_prompt, bot_response in history:
  #  prompt += f"[INST] {user_prompt} [/INST]"
  #  prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

def response(
    prompt, history,temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
):
    temperature = float(temperature)
    if temperature < 1e-2: temperature = 1e-2
    top_p = float(top_p)
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    addon=""
    results=collection.query(
      query_texts=[prompt],
      n_results=60,
      #where={"source": "google-docs"}
      #where_document={"$contains":"search_string"}
    )
    #print("REsults")
    #print(results)
    #print("_____")
    dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in results['distances'][0]]
  
    #sources=["source: "+s["source"]+")</small>" for s in results['metadatas'][0]]
    results=results['documents'][0]
    print("TEst")
    print(results)
    print("_____")
    combination = zip(results,dists)
    combination = [' '.join(triplets) for triplets in combination]
    #print(str(prompt)+"\n\n"+str(combination))
    if(len(results)>1):
      addon=" Bitte berücksichtige bei deiner Antwort ausschießlich folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
    system="Du bist ein deutschsprachiges KI-basiertes Studienberater Assistenzsystem, das zu jedem Anliegen möglichst geeignete Studieninformationen empfiehlt."+addon+"\n\nUser-Anliegen:"   
    formatted_prompt = format_prompt(system+"\n"+prompt,history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
    #output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
    # Get current date and time
    now = str(datetime.now())
    save_to_sheet(now,prompt, output)
    yield output

gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin Chätti ein KI-basiertes Studienassistenzsystem, das für jede Anfrage die am besten Studieninformationen empfiehlt.<br>Erzähle mir, was du gerne tust!"]],render_markdown=True),title="German Studyhelper Chätti").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
print("Interface up and running!")