File size: 3,874 Bytes
b9edf3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf4e91e
187e1c4
 
 
 
 
 
 
 
 
 
 
 
bcd2af8
187e1c4
b9edf3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf4e91e
b9edf3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import sqlite3
from sentence_transformers import SentenceTransformer
import lancedb
from lancedb.pydantic import LanceModel, Vector
from lancedb.embeddings import get_registry
from lancedb.rerankers import ColbertReranker
from send_openai import get_coherent_answer
# Initialize the embedding model
model_name = "BAAI/bge-small-en-v1.5"
embedding_model = SentenceTransformer(model_name)



# Initialise the embedding model
model_registry = get_registry().get("sentence-transformers")
model = model_registry.create(name="BAAI/bge-small-en-v1.5")

def create_interactions(db_path = "rag.db"):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("""CREATE TABLE IF NOT EXISTS interactions (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            question TEXT,
            chunks TEXT,
            coherent_answer TEXT,
            feedback TEXT,
            additional_feedback TEXT
        )""")    
    conn.close()
    
create_interactions()

class Document(LanceModel):
    text: str = model.SourceField()
    vector: Vector(384) = model.VectorField()
    filename: str
    username: str



# Connect to the LanceDB and create a table
db = lancedb.connect(".my_db")
tbl = db.create_table("my_table", schema=Document, exist_ok=True)

# Function to read chunks from the SQLite database
def read_chunks_from_db(db_path):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT talkname, filename, chunk, username FROM text_chunks")
    chunks = cursor.fetchall()
    conn.close()
    return chunks

# # Read chunks from the database
# db_path = 'rag.db'  # Replace with your actual database path
# chunks = read_chunks_from_db(db_path)

# # Prepare documents for embedding and storing in LanceDB
# docs = []
# for talkname, filename, chunk, username in chunks:
#     docs.append({
#         "text": chunk,
#         "filename": talkname,
#         "username": username
#     })

# # Add documents to the LanceDB table
# tbl.add(docs)
# # Generate the full-text (tf-idf) search index
# tbl.create_fts_index("text")
# Initialise a reranker
reranker = ColbertReranker()

# Function to log the interaction to the SQLite database
def log_interaction(question, chunks, coherent_answer, feedback=None, additional_feedback=None, db_path='rag.db'):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute('''
        INSERT INTO interactions (question, chunks, coherent_answer, feedback, additional_feedback)
        VALUES (?, ?, ?, ?, ?)
    ''', (question, "\n\n".join(chunks), coherent_answer, feedback, additional_feedback))
    conn.commit()
    conn.close()


# Function to call your system API and get the response
def search_question(question):
    query = question
    results = (tbl.search(query, query_type="hybrid")  # Hybrid means text + vector
               .limit(10)  # Get 10 results from first-pass retrieval
               .rerank(reranker=reranker))
    
    if results:
        documents = results.to_pydantic(Document)
        context_chunks = [doc.text for doc in documents]
        result =  get_coherent_answer(question, context_chunks)
        log_interaction(question, context_chunks, result)
        return result
    else:
        return "No answer found."
    # if results:
    #     documents = results.to_pydantic(Document)
    #     return [[doc.text, doc.filename, doc.username] for doc in documents]
    # else:
    #     return []

# # Define the query
# query = "What is Chihiro's new name given to her by the witch?"

# # Perform the search and rerank the results
# results = (tbl.search(query, query_type="hybrid")  # Hybrid means text + vector
#            .limit(10)  # Get 10 results from first-pass retrieval
#            .rerank(reranker=reranker)
#           )

# # Print the results
# print(results)
# print(results.to_pydantic(Document))