File size: 1,854 Bytes
2cbbef6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from PIL import Image
import io

from database import get_db_connection, INDEX_FILE, check_if_indexed
from security import decrypt_data

MODEL_NAME = 'clip-ViT-B-32'

def search(query, k=1):
    """
    Searches the multimodal FAISS index. The query can be text, and the result can be text or an image.
    """
    if not check_if_indexed():
        return []

    model = SentenceTransformer(MODEL_NAME)
    index = faiss.read_index(INDEX_FILE)

    # Create an embedding for the text query
    query_embedding = model.encode([query]).astype('float32')
    distances, indices = index.search(query_embedding, k)

    results = []
    conn = get_db_connection()
    for i, faiss_id in enumerate(indices[0]):
        if faiss_id != -1:
            # The faiss_id is the row number, which corresponds to the chunk's primary key 'id'
            sql_id = int(faiss_id) + 1
            
            chunk_record = conn.execute('SELECT * FROM chunks WHERE id = ?', (sql_id,)).fetchone()
            
            if chunk_record:
                content_type = chunk_record['content_type']
                decrypted_content_bytes = decrypt_data(chunk_record['encrypted_content'])
                
                # Prepare content based on its type
                if content_type == 'text':
                    content = decrypted_content_bytes.decode('utf-8')
                elif content_type == 'image':
                    content = Image.open(io.BytesIO(decrypted_content_bytes))
                
                results.append({
                    'distance': distances[0][i],
                    'content': content,
                    'type': content_type,
                    'page': chunk_record['page_num']
                })
    conn.close()
    return results