Spaces:
Runtime error
Runtime error
import json | |
import ast | |
import os | |
import pinecone | |
from pydantic import Field | |
from vector_db import Document | |
from html_parser import HTMLParser | |
from langchain.vectorstores import Pinecone | |
from config import PINECONE_API_KEY, PINECONE_ENVIRONMENT, INDEX_NAME | |
from config import EMBEDDING_API_BASE, EMBEDDING_API_KEY, OPENAI_API_TYPE, OPENAI_API_VERSION, EMBEDDING_DEPLOYMENT_ID | |
from langchain.embeddings import OpenAIEmbeddings | |
# initialize pinecone | |
pinecone.init( | |
api_key=PINECONE_API_KEY, # find at app.pinecone.io | |
environment=PINECONE_ENVIRONMENT, # next to api key in console | |
) | |
# Azure embedding model definition | |
embeddings = OpenAIEmbeddings( | |
deployment=EMBEDDING_DEPLOYMENT_ID, | |
openai_api_key=EMBEDDING_API_KEY, | |
openai_api_base=EMBEDDING_API_BASE, | |
openai_api_type=OPENAI_API_TYPE, | |
openai_api_version=OPENAI_API_VERSION, | |
chunk_size=16 | |
) | |
if INDEX_NAME and INDEX_NAME not in pinecone.list_indexes(): | |
pinecone.create_index( | |
INDEX_NAME, | |
metric="cosine", | |
dimension=1536 | |
) | |
print(f"Index {INDEX_NAME} created successfully") | |
index = pinecone.Index(INDEX_NAME) | |
with open('data.json') as json_file: | |
data = json.load(json_file) | |
datas = ast.literal_eval(data) | |
texts = [] | |
for k, v in datas.items(): | |
content = v["content"] | |
post_url = v["post_url"] | |
texts.append(Document(page_content=content, metadata={"source": post_url})) | |
if len(texts)>0: | |
Pinecone.from_documents(texts, embeddings, index_name=INDEX_NAME) | |
message = f"Add files to {INDEX_NAME} sucessfully" | |
print(message) | |