Spaces:
Running
Running
from ragas.llms.haystack_wrapper import HaystackLLMWrapper | |
from ragas.embeddings.haystack_wrapper import HaystackEmbeddingsWrapper | |
from haystack.components.generators.openai import OpenAIGenerator | |
from haystack.components.embedders import OpenAITextEmbedder | |
from haystack.utils import Secret | |
import json | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() # This loads variables from .env into the environment | |
api_key = Secret.from_token(os.getenv("NEBIUS_API_KEY")) | |
llm = HaystackLLMWrapper(OpenAIGenerator( | |
api_base_url="https://api.studio.nebius.com/v1/", | |
model="meta-llama/Llama-3.3-70B-Instruct", | |
api_key=api_key | |
)) | |
embedding = HaystackEmbeddingsWrapper(OpenAITextEmbedder( | |
api_base_url="https://api.studio.nebius.com/v1/", | |
model="BAAI/bge-en-icl", | |
api_key=api_key, | |
)) | |
from ragas.testset import TestsetGenerator | |
from langchain_core.documents import Document as LCDocument | |
file_path="ltu_programme_data.json" | |
with open(file_path, 'r', encoding='utf-8') as f: | |
data = json.load(f) | |
print(f"Successfully loaded {len(data)} records from {file_path}") | |
lcdocs = [] | |
for i, item in enumerate(data): | |
lcdocs.append(LCDocument(item['content'])) | |
# If no documents are found, provide a helpful message | |
if not lcdocs: | |
print("No documents found in the Qdrant store. Make sure documents are indexed first.") | |
generator = TestsetGenerator(llm=llm, embedding_model=embedding) | |
dataset = generator.generate_with_langchain_docs(lcdocs, testset_size=10) | |
# Save the generated test samples to a JSON file | |
dataset.to_jsonl("testset.jsonl") | |
print(f"Saved {len(dataset)} test samples to testset.jsonl") |