File size: 5,633 Bytes
a11e7e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import os
from dotenv import load_dotenv
import pandas as pd
from tqdm import tqdm
from typing import Dict, List

from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.testset import TestsetGenerator
from ragas import evaluate, EvaluationDataset
from ragas.metrics import (
    LLMContextRecall,
    Faithfulness,
    FactualCorrectness,
    ResponseRelevancy,
    ContextEntityRecall,
    NoiseSensitivity
)

# Load environment variables
load_dotenv()

# Initialize URLs and load documents
urls = [
    "https://www.timeout.com/london/things-to-do-in-london-this-weekend",
    "https://www.timeout.com/london/london-events-in-march"
]

loader = WebBaseLoader(urls)
docs = loader.load()

# Text splitting
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=50,
    length_function=len
)
split_documents = text_splitter.split_documents(docs)

# Initialize embedding models
openai_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
base_embeddings = HuggingFaceEmbeddings(model_name="Snowflake/snowflake-arctic-embed-l")
finetuned_embeddings = HuggingFaceEmbeddings(model_name="ric9176/cjo-ft-v0")

def create_rag_chain(documents: List[Document], embeddings, k: int = 6):
    """Create a RAG chain with specified embeddings"""
    # Create vector store and retriever
    vectorstore = FAISS.from_documents(documents, embeddings)
    retriever = vectorstore.as_retriever(search_kwargs={"k": k})
    
    # Create RAG prompt
    rag_prompt = ChatPromptTemplate.from_template("""
    Given a provided context and a question, you must answer the question. 
    If you do not know the answer, you must state that you do not know.

    Context:
    {context}

    Question:
    {question}

    Answer:
    """)
    
    # Create LLM
    llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
    
    # Create RAG chain
    rag_chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": rag_prompt | llm | StrOutputParser(), "context": itemgetter("context")}
    )
    
    return rag_chain

def evaluate_embeddings(documents, test_questions):
    """Evaluate different embedding models"""
    results = {}
    
    # Create RAG chains for each embedding model
    chains = {
        "OpenAI": create_rag_chain(documents, openai_embeddings),
        "Base Arctic": create_rag_chain(documents, base_embeddings),
        "Fine-tuned Arctic": create_rag_chain(documents, finetuned_embeddings)
    }
    
    # Generate test dataset using RAGAS
    generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
    generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
    generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
    
    # Evaluate each model
    for model_name, chain in chains.items():
        print(f"\nEvaluating {model_name}...")
        
        # Generate dataset
        dataset = generator.generate_with_langchain_docs(documents, testset_size=10)
        
        # Process questions through RAG pipeline
        for test_row in dataset:
            response = chain.invoke({"question": test_row.eval_sample.user_input})
            test_row.eval_sample.response = response["response"]
            test_row.eval_sample.retrieved_contexts = [
                context.page_content for context in response["context"]
            ]
        
        # Convert to evaluation dataset
        evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())
        
        # Run RAGAS evaluation
        evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
        result = evaluate(
            dataset=evaluation_dataset,
            metrics=[
                LLMContextRecall(),
                Faithfulness(),
                FactualCorrectness(),
                ResponseRelevancy(),
                ContextEntityRecall(),
                NoiseSensitivity()
            ],
            llm=evaluator_llm
        )
        
        results[model_name] = result
    
    return results

# Run evaluation
print("Starting evaluation of embedding models...")
results = evaluate_embeddings(split_documents, None)

# Save results
print("\nSaving results...")
os.makedirs("docs", exist_ok=True)

# Save detailed results for each model
for model_name, result in results.items():
    df = result.to_pandas()
    filename = f"docs/evaluation_{model_name.lower().replace(' ', '_')}.csv"
    df.to_csv(filename, index=False)
    print(f"Saved results for {model_name} to {filename}")

# Create comparison table
comparison = pd.DataFrame()
for model_name, result in results.items():
    comparison[model_name] = pd.Series(result.scores)

# Save comparison
comparison.to_csv("docs/embedding_comparison.csv")
print("\nSaved comparison to docs/embedding_comparison.csv")

# Print comparison
print("\nEmbedding Models Comparison:")
print(comparison)