Spaces:
Sleeping
Sleeping
File size: 1,854 Bytes
2cbbef6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from PIL import Image
import io
from database import get_db_connection, INDEX_FILE, check_if_indexed
from security import decrypt_data
MODEL_NAME = 'clip-ViT-B-32'
def search(query, k=1):
"""
Searches the multimodal FAISS index. The query can be text, and the result can be text or an image.
"""
if not check_if_indexed():
return []
model = SentenceTransformer(MODEL_NAME)
index = faiss.read_index(INDEX_FILE)
# Create an embedding for the text query
query_embedding = model.encode([query]).astype('float32')
distances, indices = index.search(query_embedding, k)
results = []
conn = get_db_connection()
for i, faiss_id in enumerate(indices[0]):
if faiss_id != -1:
# The faiss_id is the row number, which corresponds to the chunk's primary key 'id'
sql_id = int(faiss_id) + 1
chunk_record = conn.execute('SELECT * FROM chunks WHERE id = ?', (sql_id,)).fetchone()
if chunk_record:
content_type = chunk_record['content_type']
decrypted_content_bytes = decrypt_data(chunk_record['encrypted_content'])
# Prepare content based on its type
if content_type == 'text':
content = decrypted_content_bytes.decode('utf-8')
elif content_type == 'image':
content = Image.open(io.BytesIO(decrypted_content_bytes))
results.append({
'distance': distances[0][i],
'content': content,
'type': content_type,
'page': chunk_record['page_num']
})
conn.close()
return results
|