Spaces:
Running
Running
from typing import Dict, Optional, List | |
import json | |
from ..config.config import Config | |
from ..utils.llm import create_chat_completion | |
from ..prompts import curate_sources as rank_sources_prompt | |
from ..actions import stream_output | |
class SourceCurator: | |
"""Ranks sources and curates data based on their relevance, credibility and reliability.""" | |
def __init__(self, researcher): | |
self.researcher = researcher | |
async def curate_sources( | |
self, | |
source_data: List, | |
max_results: int = 10, | |
) -> List: | |
""" | |
Rank sources based on research data and guidelines. | |
Args: | |
query: The research query/task | |
source_data: List of source documents to rank | |
max_results: Maximum number of top sources to return | |
Returns: | |
str: Ranked list of source URLs with reasoning | |
""" | |
print(f"\n\nCurating {len(source_data)} sources: {source_data}") | |
if self.researcher.verbose: | |
await stream_output( | |
"logs", | |
"research_plan", | |
f"βοΈ Evaluating and curating sources by credibility and relevance...", | |
self.researcher.websocket, | |
) | |
response = "" | |
try: | |
response = await create_chat_completion( | |
model=self.researcher.cfg.smart_llm_model, | |
messages=[ | |
{"role": "system", "content": f"{self.researcher.role}"}, | |
{"role": "user", "content": rank_sources_prompt( | |
self.researcher.query, source_data, max_results)}, | |
], | |
temperature=0.2, | |
max_tokens=8000, | |
llm_provider=self.researcher.cfg.smart_llm_provider, | |
llm_kwargs=self.researcher.cfg.llm_kwargs, | |
cost_callback=self.researcher.add_costs, | |
) | |
curated_sources = json.loads(response) | |
print(f"\n\nFinal Curated sources {len(source_data)} sources: {curated_sources}") | |
if self.researcher.verbose: | |
await stream_output( | |
"logs", | |
"research_plan", | |
f"π Verified and ranked top {len(curated_sources)} most reliable sources", | |
self.researcher.websocket, | |
) | |
return curated_sources | |
except Exception as e: | |
print(f"Error in curate_sources from LLM response: {response}") | |
if self.researcher.verbose: | |
await stream_output( | |
"logs", | |
"research_plan", | |
f"π« Source verification failed: {str(e)}", | |
self.researcher.websocket, | |
) | |
return source_data | |