import os from enum import Enum from typing import Any, Dict, List, Optional from langchain.callbacks.manager import CallbackManagerForRetrieverRun from langchain.schema import Document from langchain.schema.retriever import BaseRetriever class SearchAPIRetriever(BaseRetriever): """Search API retriever.""" pages: List[Dict] = [] def _get_relevant_documents( self, query: str, *, run_manager: CallbackManagerForRetrieverRun ) -> List[Document]: docs = [ Document( page_content=page.get("raw_content", ""), metadata={ "title": page.get("title", ""), "source": page.get("url", ""), }, ) for page in self.pages ] return docs class SectionRetriever(BaseRetriever): """ SectionRetriever: This class is used to retrieve sections while avoiding redundant subtopics. """ sections: List[Dict] = [] """ sections example: [ { "section_title": "Example Title", "written_content": "Example content" }, ... ] """ def _get_relevant_documents( self, query: str, *, run_manager: CallbackManagerForRetrieverRun ) -> List[Document]: docs = [ Document( page_content=page.get("written_content", ""), metadata={ "section_title": page.get("section_title", ""), }, ) for page in self.sections # Changed 'self.pages' to 'self.sections' ] return docs