axa / light.py
Mayara Ayat
Upload folder using huggingface_hub
f7ab812 verified
import os
import logging
import openai
from lightrag import LightRAG, QueryParam
from lightrag.utils import EmbeddingFunc
# Set OpenAI API Key
openai.api_key = "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"
client = openai.Client(
api_key="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"
)
# Define working directory
WORKING_DIR = os.path.join(os.getcwd(), "data")
# Define the embedding function using OpenAI
async def openai_embedding(texts):
response = await client.embeddings.create(
model="text-embedding-ada-002", input=texts
)
return [item.embedding for item in response.data]
# Define the LightRAG configuration
rag = LightRAG(
working_dir=WORKING_DIR,
chunk_token_size=1200,
llm_model_func=lambda prompt: client.chat.completions.create(
model="gpt-4o", messages=[{"role": "user", "content": prompt}]
)["choices"][0]["message"]["content"],
llm_model_name="gpt-4",
llm_model_max_async=4,
llm_model_max_token_size=32768,
embedding_func=EmbeddingFunc(
embedding_dim=768, # `text-embedding-ada-002` outputs 1536-dimensional embeddings
max_token_size=8192,
func=openai_embedding,
),
)
# Read and combine files from the directory
files_paths = ["april2024/" + file for file in os.listdir("april2024")]
text = ""
for file_path in files_paths:
with open(file_path, "rb") as f:
text += f.read().decode("utf-8") + "\n"
print("Done reading files")
# Insert text into LightRAG with error handling
try:
print("Inserting text into LightRAG")
rag.insert(text)
print("Done inserting text")
except Exception as e:
print(f"Error while inserting text into LightRAG: {e}")
print("Please check the server logs or configuration.")