mini_project2 / setup_code.py
long1104's picture
Update setup_code.py
c6d6ccb verified
raw
history blame
2.41 kB
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from pinecone import Pinecone, ServerlessSpec
from tqdm.notebook import tqdm
import langchain
import openai
from openai import OpenAI
import string
import pandas as pd
import urllib.request
from io import BytesIO
from PIL import Image
import pillow_heif
from itertools import islice
from sklearn.metrics.pairwise import cosine_similarity
import gc
import ast
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import streamlit as st
import re
import Levenshtein
# from google.colab import drive
# from dotenv import load_dotenv, find_dotenv
import os
# open_ai_key_file = "/content/gdrive/MyDrive/LLM_Winter2024/open_ai_key.txt" # Your OPEN AI Key in this file
# with open(open_ai_key_file, "r") as f:
#for line in f:
#OPENAI_KEY = line.strip()
#OPEN_AI_API_KEY = line
#break
#_ = load_dotenv(find_dotenv())
# GETTING OpenAI and Pinecone api key
OPENAI_KEY = st.secrets['OPENAI_KEY']
openai.api_key = OPENAI_KEY
pc_apikey = st.secrets['pc_apikey']
openai_client = OpenAI(api_key=openai.api_key)
# Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model
def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"):
text = text.replace("\n", " ")
return openai_client.embeddings.create(input = [text], model=model).data[0].embedding
def get_completion(client, prompt, model="gpt-3.5-turbo"):
message = {"role": "user", "content": prompt}
response = openai_client.chat.completions.create(
model="gpt-4",
messages=[message]
)
return response.choices[0].message.content
def query_pinecone_vector_store(index, query_embeddn, top_k=5):
ns = get_namespace(index)
return index.query(
namespace=ns,
top_k=top_k,
vector=query_embeddn,
include_values=True,
include_metadata=True
)
def get_top_k_text(matches):
text_list = []
for i in range(0, 5):
text_list.append(matches.get('matches')[i]['metadata']['text'])
return ' '.join(text_list)
def is_Yes(response) -> bool:
similarityYes = Levenshtein.ratio("Yes", response)
similarityNo = Levenshtein.ratio("No", response)
return similarityYes > similarityNo
def contains_sorry(response) -> bool:
return "Sorry" in response