Spaces:
Running
Running
File size: 1,705 Bytes
372531f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import os
from enum import Enum
from typing import Any, Dict, List, Optional
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.schema import Document
from langchain.schema.retriever import BaseRetriever
class SearchAPIRetriever(BaseRetriever):
"""Search API retriever."""
pages: List[Dict] = []
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
docs = [
Document(
page_content=page.get("raw_content", ""),
metadata={
"title": page.get("title", ""),
"source": page.get("url", ""),
},
)
for page in self.pages
]
return docs
class SectionRetriever(BaseRetriever):
"""
SectionRetriever:
This class is used to retrieve sections while avoiding redundant subtopics.
"""
sections: List[Dict] = []
"""
sections example:
[
{
"section_title": "Example Title",
"written_content": "Example content"
},
...
]
"""
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
docs = [
Document(
page_content=page.get("written_content", ""),
metadata={
"section_title": page.get("section_title", ""),
},
)
for page in self.sections # Changed 'self.pages' to 'self.sections'
]
return docs |