Shreyas094's picture
Upload 528 files
372531f verified
import os
from enum import Enum
from typing import Any, Dict, List, Optional
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.schema import Document
from langchain.schema.retriever import BaseRetriever
class SearchAPIRetriever(BaseRetriever):
"""Search API retriever."""
pages: List[Dict] = []
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
docs = [
Document(
page_content=page.get("raw_content", ""),
metadata={
"title": page.get("title", ""),
"source": page.get("url", ""),
},
)
for page in self.pages
]
return docs
class SectionRetriever(BaseRetriever):
"""
SectionRetriever:
This class is used to retrieve sections while avoiding redundant subtopics.
"""
sections: List[Dict] = []
"""
sections example:
[
{
"section_title": "Example Title",
"written_content": "Example content"
},
...
]
"""
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
docs = [
Document(
page_content=page.get("written_content", ""),
metadata={
"section_title": page.get("section_title", ""),
},
)
for page in self.sections # Changed 'self.pages' to 'self.sections'
]
return docs