import pandas as pd | |
import openai | |
import faiss | |
import os | |
# Load your custom CSV data | |
data = pd.read_csv( os.getcwd() + "\\Learning_Pathway_Index.csv") | |
# Initialize and populate FAISS index | |
vector_dimension = 768 # For example, if you use a GPT-3 model with 768-dimensional embeddings | |
index = faiss.IndexFlatL2(vector_dimension) | |
vectors = [] # List to store vector representations of data | |
for text in data['text_column']: | |
# Vectorize the text using a pre-trained model (e.g., GPT-3) | |
# Replace 'YOUR_OPENAI_API_KEY' with your actual API key | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
response = openai.Completion.create( | |
engine="text-davinci-002", | |
prompt=text, | |
max_tokens=50 # Adjust the token limit as needed | |
) | |
vector = response.choices[0].embedding | |
vectors.append(vector) | |
# Convert the list of vectors to a numpy array | |
vectors = np.array(vectors).astype('float32') | |
# Add vectors to the FAISS index | |
index.add(vectors) | |
# Accept user questions using OpenAI | |
user_question = input("Ask a question: ") | |
# Vectorize the user's question | |
user_vector = vectorize_user_question(user_question) # Implement this function | |
# Search for similar items in the FAISS index | |
k = 5 # Number of similar items to retrieve | |
distances, indices = index.search(user_vector, k) | |
# Retrieve and display the similar items | |
similar_items = data.iloc[indices[0]] | |
print(similar_items) | |