|
import os |
|
import logging |
|
import openai |
|
from lightrag import LightRAG, QueryParam |
|
from lightrag.utils import EmbeddingFunc |
|
|
|
|
|
openai.api_key = "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" |
|
client = openai.Client( |
|
api_key="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" |
|
) |
|
|
|
WORKING_DIR = os.path.join(os.getcwd(), "data") |
|
|
|
|
|
|
|
async def openai_embedding(texts): |
|
response = await client.embeddings.create( |
|
model="text-embedding-ada-002", input=texts |
|
) |
|
return [item.embedding for item in response.data] |
|
|
|
|
|
|
|
rag = LightRAG( |
|
working_dir=WORKING_DIR, |
|
chunk_token_size=1200, |
|
llm_model_func=lambda prompt: client.chat.completions.create( |
|
model="gpt-4o", messages=[{"role": "user", "content": prompt}] |
|
)["choices"][0]["message"]["content"], |
|
llm_model_name="gpt-4", |
|
llm_model_max_async=4, |
|
llm_model_max_token_size=32768, |
|
embedding_func=EmbeddingFunc( |
|
embedding_dim=768, |
|
max_token_size=8192, |
|
func=openai_embedding, |
|
), |
|
) |
|
|
|
|
|
files_paths = ["april2024/" + file for file in os.listdir("april2024")] |
|
text = "" |
|
for file_path in files_paths: |
|
with open(file_path, "rb") as f: |
|
text += f.read().decode("utf-8") + "\n" |
|
|
|
print("Done reading files") |
|
|
|
|
|
try: |
|
print("Inserting text into LightRAG") |
|
rag.insert(text) |
|
print("Done inserting text") |
|
except Exception as e: |
|
print(f"Error while inserting text into LightRAG: {e}") |
|
print("Please check the server logs or configuration.") |
|
|