import os import logging import openai from lightrag import LightRAG, QueryParam from lightrag.utils import EmbeddingFunc # Set OpenAI API Key openai.api_key = "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" client = openai.Client( api_key="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" ) # Define working directory WORKING_DIR = os.path.join(os.getcwd(), "data") # Define the embedding function using OpenAI async def openai_embedding(texts): response = await client.embeddings.create( model="text-embedding-ada-002", input=texts ) return [item.embedding for item in response.data] # Define the LightRAG configuration rag = LightRAG( working_dir=WORKING_DIR, chunk_token_size=1200, llm_model_func=lambda prompt: client.chat.completions.create( model="gpt-4o", messages=[{"role": "user", "content": prompt}] )["choices"][0]["message"]["content"], llm_model_name="gpt-4", llm_model_max_async=4, llm_model_max_token_size=32768, embedding_func=EmbeddingFunc( embedding_dim=768, # `text-embedding-ada-002` outputs 1536-dimensional embeddings max_token_size=8192, func=openai_embedding, ), ) # Read and combine files from the directory files_paths = ["april2024/" + file for file in os.listdir("april2024")] text = "" for file_path in files_paths: with open(file_path, "rb") as f: text += f.read().decode("utf-8") + "\n" print("Done reading files") # Insert text into LightRAG with error handling try: print("Inserting text into LightRAG") rag.insert(text) print("Done inserting text") except Exception as e: print(f"Error while inserting text into LightRAG: {e}") print("Please check the server logs or configuration.")