from langchain_openai import OpenAIEmbeddings from langchain.vectorstores import Pinecone from pinecone import Pinecone, ServerlessSpec from tqdm.notebook import tqdm import langchain import openai from openai import OpenAI import string import pandas as pd import urllib.request from io import BytesIO from PIL import Image import pillow_heif from itertools import islice from sklearn.metrics.pairwise import cosine_similarity import gc import ast from langchain.text_splitter import RecursiveCharacterTextSplitter from sentence_transformers import SentenceTransformer import streamlit as st import re import Levenshtein # from google.colab import drive # from dotenv import load_dotenv, find_dotenv import os # open_ai_key_file = "/content/gdrive/MyDrive/LLM_Winter2024/open_ai_key.txt" # Your OPEN AI Key in this file # with open(open_ai_key_file, "r") as f: #for line in f: #OPENAI_KEY = line.strip() #OPEN_AI_API_KEY = line #break #_ = load_dotenv(find_dotenv()) # GETTING OpenAI and Pinecone api key OPENAI_KEY = st.secrets['OPENAI_KEY'] openai.api_key = OPENAI_KEY pc_apikey = st.secrets['pc_apikey'] print(OPENAI_KEY) openai_client = OpenAI(api_key=openai.api_key) # Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"): text = text.replace("\n", " ") return openai_client.embeddings.create(input = [text], model=model).data[0].embedding def get_completion(client, prompt, model="gpt-3.5-turbo"): message = {"role": "user", "content": prompt} response = openai_client.chat.completions.create( model="gpt-4", messages=[message] ) return response.choices[0].message.content def query_pinecone_vector_store(index, query_embeddn, top_k=5): ns = get_namespace(index) return index.query( namespace=ns, top_k=top_k, vector=query_embeddn, include_values=True, include_metadata=True ) def get_top_k_text(matches): text_list = [] for i in range(0, 5): text_list.append(matches.get('matches')[i]['metadata']['text']) return ' '.join(text_list) def is_Yes(response) -> bool: similarityYes = Levenshtein.ratio("Yes", response) similarityNo = Levenshtein.ratio("No", response) return similarityYes > similarityNo def contains_sorry(response) -> bool: return "Sorry" in response