Spaces:
Sleeping
Sleeping
from langchain_openai import OpenAIEmbeddings | |
from langchain.vectorstores import Pinecone | |
from pinecone import Pinecone, ServerlessSpec | |
from tqdm.notebook import tqdm | |
import langchain | |
import openai | |
from openai import OpenAI | |
import string | |
import pandas as pd | |
import urllib.request | |
from io import BytesIO | |
from PIL import Image | |
import pillow_heif | |
from itertools import islice | |
from sklearn.metrics.pairwise import cosine_similarity | |
import gc | |
import ast | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from sentence_transformers import SentenceTransformer | |
import streamlit as st | |
import re | |
import Levenshtein | |
# from google.colab import drive | |
# from dotenv import load_dotenv, find_dotenv | |
import os | |
# open_ai_key_file = "/content/gdrive/MyDrive/LLM_Winter2024/open_ai_key.txt" # Your OPEN AI Key in this file | |
# with open(open_ai_key_file, "r") as f: | |
#for line in f: | |
#OPENAI_KEY = line.strip() | |
#OPEN_AI_API_KEY = line | |
#break | |
#_ = load_dotenv(find_dotenv()) | |
# GETTING OpenAI and Pinecone api key | |
openai.api_key = st.secrets['OPENAI_KEY'] | |
pc_apikey = st.secrets['pc_apikey'] | |
openai_client = OpenAI(api_key=openai.api_key) | |
# Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model | |
def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"): | |
text = text.replace("\n", " ") | |
return openai_client.embeddings.create(input = [text], model=model).data[0].embedding | |
def get_completion(client, prompt, model="gpt-3.5-turbo"): | |
message = {"role": "user", "content": prompt} | |
response = openai_client.chat.completions.create( | |
model="gpt-4", | |
messages=[message] | |
) | |
return response.choices[0].message.content | |
def query_pinecone_vector_store(index, query_embeddn, top_k=5): | |
ns = get_namespace(index) | |
return index.query( | |
namespace=ns, | |
top_k=top_k, | |
vector=query_embeddn, | |
include_values=True, | |
include_metadata=True | |
) | |
def get_top_k_text(matches): | |
text_list = [] | |
for i in range(0, 5): | |
text_list.append(matches.get('matches')[i]['metadata']['text']) | |
return ' '.join(text_list) | |
def is_Yes(response) -> bool: | |
similarityYes = Levenshtein.ratio("Yes", response) | |
similarityNo = Levenshtein.ratio("No", response) | |
return similarityYes > similarityNo | |
def contains_sorry(response) -> bool: | |
return "Sorry" in response |