import os
import logging
import openai
from lightrag import LightRAG, QueryParam
from lightrag.utils import EmbeddingFunc

# Set OpenAI API Key
openai.api_key = "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"
client = openai.Client(
    api_key="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"
)
# Define working directory
WORKING_DIR = os.path.join(os.getcwd(), "data")


# Define the embedding function using OpenAI
async def openai_embedding(texts):
    response = await client.embeddings.create(
        model="text-embedding-ada-002", input=texts
    )
    return [item.embedding for item in response.data]


# Define the LightRAG configuration
rag = LightRAG(
    working_dir=WORKING_DIR,
    chunk_token_size=1200,
    llm_model_func=lambda prompt: client.chat.completions.create(
        model="gpt-4o", messages=[{"role": "user", "content": prompt}]
    )["choices"][0]["message"]["content"],
    llm_model_name="gpt-4",
    llm_model_max_async=4,
    llm_model_max_token_size=32768,
    embedding_func=EmbeddingFunc(
        embedding_dim=768,  # `text-embedding-ada-002` outputs 1536-dimensional embeddings
        max_token_size=8192,
        func=openai_embedding,
    ),
)

# Read and combine files from the directory
files_paths = ["april2024/" + file for file in os.listdir("april2024")]
text = ""
for file_path in files_paths:
    with open(file_path, "rb") as f:
        text += f.read().decode("utf-8") + "\n"

print("Done reading files")

# Insert text into LightRAG with error handling
try:
    print("Inserting text into LightRAG")
    rag.insert(text)
    print("Done inserting text")
except Exception as e:
    print(f"Error while inserting text into LightRAG: {e}")
    print("Please check the server logs or configuration.")