Spaces:

QAI
/

Chat_QnA_v2

Runtime error

Chat_QnA_v2 / process_fb.py

update new

d037cdf over 1 year ago

1.57 kB

	import json
	import ast
	import os
	import pinecone

	from pydantic import Field
	from vector_db import Document
	from html_parser import HTMLParser
	from langchain.vectorstores import Pinecone
	from config import PINECONE_API_KEY, PINECONE_ENVIRONMENT, INDEX_NAME
	from config import EMBEDDING_API_BASE, EMBEDDING_API_KEY, OPENAI_API_TYPE, OPENAI_API_VERSION, EMBEDDING_DEPLOYMENT_ID
	from langchain.embeddings import OpenAIEmbeddings


	# initialize pinecone
	pinecone.init(
	api_key=PINECONE_API_KEY, # find at app.pinecone.io
	environment=PINECONE_ENVIRONMENT, # next to api key in console
	)

	# Azure embedding model definition
	embeddings = OpenAIEmbeddings(
	deployment=EMBEDDING_DEPLOYMENT_ID,
	openai_api_key=EMBEDDING_API_KEY,
	openai_api_base=EMBEDDING_API_BASE,
	openai_api_type=OPENAI_API_TYPE,
	openai_api_version=OPENAI_API_VERSION,
	chunk_size=16
	)

	if INDEX_NAME and INDEX_NAME not in pinecone.list_indexes():
	pinecone.create_index(
	INDEX_NAME,
	metric="cosine",
	dimension=1536
	)
	print(f"Index {INDEX_NAME} created successfully")

	index = pinecone.Index(INDEX_NAME)

	with open('data.json') as json_file:
	data = json.load(json_file)
	datas = ast.literal_eval(data)

	texts = []
	for k, v in datas.items():
	content = v["content"]
	post_url = v["post_url"]
	texts.append(Document(page_content=content, metadata={"source": post_url}))

	if len(texts)>0:
	Pinecone.from_documents(texts, embeddings, index_name=INDEX_NAME)
	message = f"Add files to {INDEX_NAME} sucessfully"
	print(message)