remove google scholar
Browse files- app.py +5 -54
- consilium_mcp +1 -0
- enhanced_search_functions.py +2 -8
- requirements.txt +1 -2
- research_tools/__init__.py +0 -2
- research_tools/base_tool.py +0 -1
- research_tools/research_agent.py +3 -5
- research_tools/scholar_search.py +0 -248
- test_research_tools.py +1 -4
    	
        app.py
    CHANGED
    
    | @@ -392,33 +392,10 @@ class VisualConsensusEngine: | |
| 392 | 
             
                            self.update_research_progress(f"Wikipedia search complete - found {len(result)} characters")
         | 
| 393 |  | 
| 394 | 
             
                        elif function_name == "search_academic":
         | 
| 395 | 
            -
                             | 
| 396 | 
            -
                            
         | 
| 397 | 
            -
                             | 
| 398 | 
            -
             | 
| 399 | 
            -
                                self.update_research_progress("Searching academic papers on arXiv...")
         | 
| 400 | 
            -
                                result = self.search_agent.tools['arxiv'].search(arguments["query"])
         | 
| 401 | 
            -
                                self.update_research_progress(f"arXiv search complete - found {len(result)} characters")
         | 
| 402 | 
            -
                                
         | 
| 403 | 
            -
                            elif source == "scholar":
         | 
| 404 | 
            -
                                self.update_research_progress("Connecting to Google Scholar...")
         | 
| 405 | 
            -
                                self.update_research_progress("Searching peer-reviewed research...")
         | 
| 406 | 
            -
                                result = self.search_agent.tools['scholar'].search(arguments["query"])
         | 
| 407 | 
            -
                                self.update_research_progress(f"Google Scholar search complete - found {len(result)} characters")
         | 
| 408 | 
            -
                                
         | 
| 409 | 
            -
                            else:  # both sources
         | 
| 410 | 
            -
                                self.update_research_progress("Connecting to arXiv preprint server...")
         | 
| 411 | 
            -
                                self.update_research_progress("Searching academic papers on arXiv...")
         | 
| 412 | 
            -
                                arxiv_result = self.search_agent.tools['arxiv'].search(arguments["query"])
         | 
| 413 | 
            -
                                self.update_research_progress(f"arXiv complete ({len(arxiv_result)} chars) - now searching Google Scholar...")
         | 
| 414 | 
            -
                                
         | 
| 415 | 
            -
                                self.update_research_progress("Connecting to Google Scholar...")
         | 
| 416 | 
            -
                                self.update_research_progress("Searching peer-reviewed research...")
         | 
| 417 | 
            -
                                scholar_result = self.search_agent.tools['scholar'].search(arguments["query"])
         | 
| 418 | 
            -
                                self.update_research_progress("Combining arXiv and Google Scholar results...")
         | 
| 419 | 
            -
                                
         | 
| 420 | 
            -
                                result = f"{arxiv_result}\n\n{scholar_result}"
         | 
| 421 | 
            -
                                self.update_research_progress(f"Academic search complete - combined {len(result)} characters")
         | 
| 422 |  | 
| 423 | 
             
                        elif function_name == "search_technology_trends":
         | 
| 424 | 
             
                            self.update_research_progress("Connecting to GitHub API...")
         | 
| @@ -1477,7 +1454,7 @@ with gr.Blocks(title="π Consilium: Multi-AI Expert Consensus Platform", theme | |
| 1477 | 
             
                * Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
         | 
| 1478 | 
             
                * MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
         | 
| 1479 | 
             
                * Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
         | 
| 1480 | 
            -
                * Research Agent with 6 sources (**Web Search**, **Wikipedia**, **arXiv**, **GitHub**, **SEC EDGAR | 
| 1481 | 
             
                * Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
         | 
| 1482 | 
             
                * Pick one model as the lead analyst (had the best results when picking Mistral).
         | 
| 1483 | 
             
                * Configure the amount of discussion rounds.
         | 
| @@ -1704,32 +1681,6 @@ with gr.Blocks(title="π Consilium: Multi-AI Expert Consensus Platform", theme | |
| 1704 | 
             
                    """)
         | 
| 1705 |  | 
| 1706 | 
             
                with gr.Tab("π Documentation"):
         | 
| 1707 | 
            -
                    gr.Markdown("""
         | 
| 1708 | 
            -
                    ## π¬ **Research Capabilities**
         | 
| 1709 | 
            -
             | 
| 1710 | 
            -
                    ### **π Multi-Source Research**
         | 
| 1711 | 
            -
                    - **DuckDuckGo Web Search**: Current events, news, real-time information
         | 
| 1712 | 
            -
                    - **Wikipedia**: Authoritative background and encyclopedic data  
         | 
| 1713 | 
            -
                    - **arXiv**: Academic papers and scientific research preprints
         | 
| 1714 | 
            -
                    - **Google Scholar**: Peer-reviewed research and citation analysis
         | 
| 1715 | 
            -
                    - **GitHub**: Technology trends, adoption patterns, developer activity
         | 
| 1716 | 
            -
                    - **SEC EDGAR**: Public company financial data and regulatory filings
         | 
| 1717 | 
            -
             | 
| 1718 | 
            -
                    ### **π― Smart Research Routing**
         | 
| 1719 | 
            -
                    The system automatically routes queries to the most appropriate sources:
         | 
| 1720 | 
            -
                    - **Academic queries** β arXiv + Google Scholar
         | 
| 1721 | 
            -
                    - **Technology questions** β GitHub + Web Search
         | 
| 1722 | 
            -
                    - **Company research** β SEC filings + Web Search  
         | 
| 1723 | 
            -
                    - **Current events** β Web Search + Wikipedia
         | 
| 1724 | 
            -
                    - **Deep research** β Multi-source synthesis with quality scoring
         | 
| 1725 | 
            -
             | 
| 1726 | 
            -
                    ### **π Research Quality Scoring**
         | 
| 1727 | 
            -
                    Each research result is scored on:
         | 
| 1728 | 
            -
                    - **Recency** (0-1): How current is the information
         | 
| 1729 | 
            -
                    - **Authority** (0-1): Source credibility and reliability
         | 
| 1730 | 
            -
                    - **Specificity** (0-1): Quantitative data and specific details
         | 
| 1731 | 
            -
                    - **Relevance** (0-1): How well it matches the query
         | 
| 1732 | 
            -
                    """)
         | 
| 1733 | 
             
                    gr.Markdown("""
         | 
| 1734 | 
             
                    ## π **Expert Role Assignments**
         | 
| 1735 |  | 
|  | |
| 392 | 
             
                            self.update_research_progress(f"Wikipedia search complete - found {len(result)} characters")
         | 
| 393 |  | 
| 394 | 
             
                        elif function_name == "search_academic":
         | 
| 395 | 
            +
                            self.update_research_progress("Connecting to arXiv preprint server...")
         | 
| 396 | 
            +
                            self.update_research_progress("Searching academic papers on arXiv...")
         | 
| 397 | 
            +
                            result = self.search_agent.tools['arxiv'].search(arguments["query"])
         | 
| 398 | 
            +
                            self.update_research_progress(f"arXiv search complete - found {len(result)} characters")
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 399 |  | 
| 400 | 
             
                        elif function_name == "search_technology_trends":
         | 
| 401 | 
             
                            self.update_research_progress("Connecting to GitHub API...")
         | 
|  | |
| 1454 | 
             
                * Visual roundtable of the AI models, including speech bubbles to see the discussion in real time.
         | 
| 1455 | 
             
                * MCP mode enabled to also use it directly in, for example, Claude Desktop (without the visual table).
         | 
| 1456 | 
             
                * Includes Mistral (**mistral-large-latest**) via their API and the Models **DeepSeek-R1**, **Meta-Llama-3.3-70B-Instruct** and **QwQ-32B** via the SambaNova API.
         | 
| 1457 | 
            +
                * Research Agent with 6 sources (**Web Search**, **Wikipedia**, **arXiv**, **GitHub**, **SEC EDGAR**) for comprehensive live research.
         | 
| 1458 | 
             
                * Assign different roles to the models, the protocol they should follow, and decide the communication strategy.
         | 
| 1459 | 
             
                * Pick one model as the lead analyst (had the best results when picking Mistral).
         | 
| 1460 | 
             
                * Configure the amount of discussion rounds.
         | 
|  | |
| 1681 | 
             
                    """)
         | 
| 1682 |  | 
| 1683 | 
             
                with gr.Tab("π Documentation"):
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1684 | 
             
                    gr.Markdown("""
         | 
| 1685 | 
             
                    ## π **Expert Role Assignments**
         | 
| 1686 |  | 
    	
        consilium_mcp
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            Subproject commit 883815f94aa0a2cba5d9bf5ea89db12fd75a1676
         | 
    	
        enhanced_search_functions.py
    CHANGED
    
    | @@ -48,19 +48,13 @@ ENHANCED_SEARCH_FUNCTIONS = [ | |
| 48 | 
             
                    "type": "function",
         | 
| 49 | 
             
                    "function": {
         | 
| 50 | 
             
                        "name": "search_academic",
         | 
| 51 | 
            -
                        "description": "Search academic papers and research on arXiv  | 
| 52 | 
             
                        "parameters": {
         | 
| 53 | 
             
                            "type": "object",
         | 
| 54 | 
             
                            "properties": {
         | 
| 55 | 
             
                                "query": {
         | 
| 56 | 
             
                                    "type": "string",
         | 
| 57 | 
             
                                    "description": "Academic research query to find peer-reviewed papers and scientific studies"
         | 
| 58 | 
            -
                                },
         | 
| 59 | 
            -
                                "source": {
         | 
| 60 | 
            -
                                    "type": "string",
         | 
| 61 | 
            -
                                    "enum": ["arxiv", "scholar", "both"],
         | 
| 62 | 
            -
                                    "description": "Academic source to search - arXiv for preprints, Scholar for citations, both for comprehensive",
         | 
| 63 | 
            -
                                    "default": "both"
         | 
| 64 | 
             
                                }
         | 
| 65 | 
             
                            },
         | 
| 66 | 
             
                            "required": ["query"]
         | 
| @@ -117,7 +111,7 @@ ENHANCED_SEARCH_FUNCTIONS = [ | |
| 117 | 
             
                                    "type": "array",
         | 
| 118 | 
             
                                    "items": {
         | 
| 119 | 
             
                                        "type": "string",
         | 
| 120 | 
            -
                                        "enum": ["web", "wikipedia", "arxiv", " | 
| 121 | 
             
                                    },
         | 
| 122 | 
             
                                    "description": "Priority list of sources to focus on for this research",
         | 
| 123 | 
             
                                    "default": []
         | 
|  | |
| 48 | 
             
                    "type": "function",
         | 
| 49 | 
             
                    "function": {
         | 
| 50 | 
             
                        "name": "search_academic",
         | 
| 51 | 
            +
                        "description": "Search academic papers and research on arXiv for scientific evidence",
         | 
| 52 | 
             
                        "parameters": {
         | 
| 53 | 
             
                            "type": "object",
         | 
| 54 | 
             
                            "properties": {
         | 
| 55 | 
             
                                "query": {
         | 
| 56 | 
             
                                    "type": "string",
         | 
| 57 | 
             
                                    "description": "Academic research query to find peer-reviewed papers and scientific studies"
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 58 | 
             
                                }
         | 
| 59 | 
             
                            },
         | 
| 60 | 
             
                            "required": ["query"]
         | 
|  | |
| 111 | 
             
                                    "type": "array",
         | 
| 112 | 
             
                                    "items": {
         | 
| 113 | 
             
                                        "type": "string",
         | 
| 114 | 
            +
                                        "enum": ["web", "wikipedia", "arxiv", "github", "sec"]
         | 
| 115 | 
             
                                    },
         | 
| 116 | 
             
                                    "description": "Priority list of sources to focus on for this research",
         | 
| 117 | 
             
                                    "default": []
         | 
    	
        requirements.txt
    CHANGED
    
    | @@ -7,5 +7,4 @@ python-dotenv | |
| 7 | 
             
            duckduckgo-search
         | 
| 8 | 
             
            wikipedia
         | 
| 9 | 
             
            gradio-consilium-roundtable
         | 
| 10 | 
            -
            openai
         | 
| 11 | 
            -
            scholarly
         | 
|  | |
| 7 | 
             
            duckduckgo-search
         | 
| 8 | 
             
            wikipedia
         | 
| 9 | 
             
            gradio-consilium-roundtable
         | 
| 10 | 
            +
            openai
         | 
|  | 
    	
        research_tools/__init__.py
    CHANGED
    
    | @@ -5,7 +5,6 @@ from .wikipedia_search import WikipediaSearchTool | |
| 5 | 
             
            from .arxiv_search import ArxivSearchTool
         | 
| 6 | 
             
            from .github_search import GitHubSearchTool
         | 
| 7 | 
             
            from .sec_search import SECSearchTool
         | 
| 8 | 
            -
            from .scholar_search import GoogleScholarTool
         | 
| 9 | 
             
            from .research_agent import EnhancedResearchAgent
         | 
| 10 |  | 
| 11 | 
             
            __all__ = [
         | 
| @@ -15,6 +14,5 @@ __all__ = [ | |
| 15 | 
             
                'ArxivSearchTool',
         | 
| 16 | 
             
                'GitHubSearchTool', 
         | 
| 17 | 
             
                'SECSearchTool',
         | 
| 18 | 
            -
                'GoogleScholarTool',
         | 
| 19 | 
             
                'EnhancedResearchAgent'
         | 
| 20 | 
             
            ]
         | 
|  | |
| 5 | 
             
            from .arxiv_search import ArxivSearchTool
         | 
| 6 | 
             
            from .github_search import GitHubSearchTool
         | 
| 7 | 
             
            from .sec_search import SECSearchTool
         | 
|  | |
| 8 | 
             
            from .research_agent import EnhancedResearchAgent
         | 
| 9 |  | 
| 10 | 
             
            __all__ = [
         | 
|  | |
| 14 | 
             
                'ArxivSearchTool',
         | 
| 15 | 
             
                'GitHubSearchTool', 
         | 
| 16 | 
             
                'SECSearchTool',
         | 
|  | |
| 17 | 
             
                'EnhancedResearchAgent'
         | 
| 18 | 
             
            ]
         | 
    	
        research_tools/base_tool.py
    CHANGED
    
    | @@ -65,7 +65,6 @@ class BaseTool(ABC): | |
| 65 | 
             
                    """Check source authority and credibility indicators"""
         | 
| 66 | 
             
                    authority_indicators = {
         | 
| 67 | 
             
                        'arxiv': 0.9,
         | 
| 68 | 
            -
                        'scholar': 0.9,
         | 
| 69 | 
             
                        'sec': 0.95,
         | 
| 70 | 
             
                        'github': 0.7,
         | 
| 71 | 
             
                        'wikipedia': 0.8,
         | 
|  | |
| 65 | 
             
                    """Check source authority and credibility indicators"""
         | 
| 66 | 
             
                    authority_indicators = {
         | 
| 67 | 
             
                        'arxiv': 0.9,
         | 
|  | |
| 68 | 
             
                        'sec': 0.95,
         | 
| 69 | 
             
                        'github': 0.7,
         | 
| 70 | 
             
                        'wikipedia': 0.8,
         | 
    	
        research_tools/research_agent.py
    CHANGED
    
    | @@ -11,7 +11,6 @@ from .wikipedia_search import WikipediaSearchTool | |
| 11 | 
             
            from .arxiv_search import ArxivSearchTool
         | 
| 12 | 
             
            from .github_search import GitHubSearchTool
         | 
| 13 | 
             
            from .sec_search import SECSearchTool
         | 
| 14 | 
            -
            from .scholar_search import GoogleScholarTool
         | 
| 15 |  | 
| 16 |  | 
| 17 | 
             
            class EnhancedResearchAgent:
         | 
| @@ -24,8 +23,7 @@ class EnhancedResearchAgent: | |
| 24 | 
             
                        'wikipedia': WikipediaSearchTool(),
         | 
| 25 | 
             
                        'arxiv': ArxivSearchTool(),
         | 
| 26 | 
             
                        'github': GitHubSearchTool(),
         | 
| 27 | 
            -
                        'sec': SECSearchTool() | 
| 28 | 
            -
                        'scholar': GoogleScholarTool()
         | 
| 29 | 
             
                    }
         | 
| 30 |  | 
| 31 | 
             
                    # Tool availability status
         | 
| @@ -92,7 +90,7 @@ class EnhancedResearchAgent: | |
| 92 | 
             
                    for tool_name, tool in self.tools.items():
         | 
| 93 | 
             
                        if tool.should_use_for_query(query):
         | 
| 94 | 
             
                            # Return first matching tool based on priority order
         | 
| 95 | 
            -
                            priority_order = ['arxiv', 'sec', 'github', ' | 
| 96 | 
             
                            if tool_name in priority_order[:3]:  # High-priority specialized tools
         | 
| 97 | 
             
                                return tool_name
         | 
| 98 |  | 
| @@ -123,7 +121,7 @@ class EnhancedResearchAgent: | |
| 123 | 
             
                    # Ensure we don't overwhelm with too many sources
         | 
| 124 | 
             
                    if len(relevant_tools) > 4:
         | 
| 125 | 
             
                        # Prioritize specialized tools
         | 
| 126 | 
            -
                        priority_order = ['arxiv', 'sec', 'github', ' | 
| 127 | 
             
                        relevant_tools = [tool for tool in priority_order if tool in relevant_tools][:4]
         | 
| 128 |  | 
| 129 | 
             
                    return relevant_tools
         | 
|  | |
| 11 | 
             
            from .arxiv_search import ArxivSearchTool
         | 
| 12 | 
             
            from .github_search import GitHubSearchTool
         | 
| 13 | 
             
            from .sec_search import SECSearchTool
         | 
|  | |
| 14 |  | 
| 15 |  | 
| 16 | 
             
            class EnhancedResearchAgent:
         | 
|  | |
| 23 | 
             
                        'wikipedia': WikipediaSearchTool(),
         | 
| 24 | 
             
                        'arxiv': ArxivSearchTool(),
         | 
| 25 | 
             
                        'github': GitHubSearchTool(),
         | 
| 26 | 
            +
                        'sec': SECSearchTool()
         | 
|  | |
| 27 | 
             
                    }
         | 
| 28 |  | 
| 29 | 
             
                    # Tool availability status
         | 
|  | |
| 90 | 
             
                    for tool_name, tool in self.tools.items():
         | 
| 91 | 
             
                        if tool.should_use_for_query(query):
         | 
| 92 | 
             
                            # Return first matching tool based on priority order
         | 
| 93 | 
            +
                            priority_order = ['arxiv', 'sec', 'github', 'wikipedia', 'web']
         | 
| 94 | 
             
                            if tool_name in priority_order[:3]:  # High-priority specialized tools
         | 
| 95 | 
             
                                return tool_name
         | 
| 96 |  | 
|  | |
| 121 | 
             
                    # Ensure we don't overwhelm with too many sources
         | 
| 122 | 
             
                    if len(relevant_tools) > 4:
         | 
| 123 | 
             
                        # Prioritize specialized tools
         | 
| 124 | 
            +
                        priority_order = ['arxiv', 'sec', 'github', 'wikipedia', 'web']
         | 
| 125 | 
             
                        relevant_tools = [tool for tool in priority_order if tool in relevant_tools][:4]
         | 
| 126 |  | 
| 127 | 
             
                    return relevant_tools
         | 
    	
        research_tools/scholar_search.py
    DELETED
    
    | @@ -1,248 +0,0 @@ | |
| 1 | 
            -
            """
         | 
| 2 | 
            -
            Google Scholar Search Tool for academic research
         | 
| 3 | 
            -
            """
         | 
| 4 | 
            -
            from .base_tool import BaseTool
         | 
| 5 | 
            -
            from typing import List, Dict, Optional
         | 
| 6 | 
            -
             | 
| 7 | 
            -
            try:
         | 
| 8 | 
            -
                from scholarly import scholarly
         | 
| 9 | 
            -
                SCHOLARLY_AVAILABLE = True
         | 
| 10 | 
            -
            except ImportError:
         | 
| 11 | 
            -
                SCHOLARLY_AVAILABLE = False
         | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
            class GoogleScholarTool(BaseTool):
         | 
| 15 | 
            -
                """Search Google Scholar for academic research papers"""
         | 
| 16 | 
            -
                
         | 
| 17 | 
            -
                def __init__(self):
         | 
| 18 | 
            -
                    super().__init__("Google Scholar", "Search Google Scholar for academic research papers and citations")
         | 
| 19 | 
            -
                    self.available = SCHOLARLY_AVAILABLE
         | 
| 20 | 
            -
                    self.rate_limit_delay = 3.0  # Be very respectful to Google Scholar
         | 
| 21 | 
            -
                
         | 
| 22 | 
            -
                def search(self, query: str, max_results: int = 4, **kwargs) -> str:
         | 
| 23 | 
            -
                    """Search Google Scholar for research papers"""
         | 
| 24 | 
            -
                    if not self.available:
         | 
| 25 | 
            -
                        return self._unavailable_response(query)
         | 
| 26 | 
            -
                    
         | 
| 27 | 
            -
                    self.rate_limit()
         | 
| 28 | 
            -
                    
         | 
| 29 | 
            -
                    try:
         | 
| 30 | 
            -
                        # Search for publications with timeout handling
         | 
| 31 | 
            -
                        search_query = scholarly.search_pubs(query)
         | 
| 32 | 
            -
                        
         | 
| 33 | 
            -
                        papers = []
         | 
| 34 | 
            -
                        for i, paper in enumerate(search_query):
         | 
| 35 | 
            -
                            if i >= max_results:
         | 
| 36 | 
            -
                                break
         | 
| 37 | 
            -
                            papers.append(paper)
         | 
| 38 | 
            -
                        
         | 
| 39 | 
            -
                        if papers:
         | 
| 40 | 
            -
                            result = f"**Google Scholar Research for: {query}**\n\n"
         | 
| 41 | 
            -
                            result += self._format_scholar_results(papers)
         | 
| 42 | 
            -
                            result += self._analyze_research_quality(papers)
         | 
| 43 | 
            -
                            return result
         | 
| 44 | 
            -
                        else:
         | 
| 45 | 
            -
                            return f"**Google Scholar Research for: {query}**\n\nNo relevant academic papers found."
         | 
| 46 | 
            -
                            
         | 
| 47 | 
            -
                    except Exception as e:
         | 
| 48 | 
            -
                        error_msg = str(e)
         | 
| 49 | 
            -
                        if "blocked" in error_msg.lower() or "captcha" in error_msg.lower():
         | 
| 50 | 
            -
                            return f"**Google Scholar Research for: {query}**\n\nGoogle Scholar is temporarily blocking automated requests. This is normal behavior. Academic research is available through other sources like arXiv."
         | 
| 51 | 
            -
                        elif "timeout" in error_msg.lower():
         | 
| 52 | 
            -
                            return f"**Google Scholar Research for: {query}**\n\nRequest timeout - Google Scholar may be experiencing high load. Academic research available but slower than expected."
         | 
| 53 | 
            -
                        else:
         | 
| 54 | 
            -
                            return self.format_error_response(query, str(e))
         | 
| 55 | 
            -
                
         | 
| 56 | 
            -
                def _unavailable_response(self, query: str) -> str:
         | 
| 57 | 
            -
                    """Response when scholarly library is not available"""
         | 
| 58 | 
            -
                    result = f"**Google Scholar Research for: {query}**\n\n"
         | 
| 59 | 
            -
                    result += "**Library Not Available**\n"
         | 
| 60 | 
            -
                    result += "Google Scholar integration requires the 'scholarly' library.\n\n"
         | 
| 61 | 
            -
                    result += "**Installation Instructions:**\n"
         | 
| 62 | 
            -
                    result += "```bash\n"
         | 
| 63 | 
            -
                    result += "pip install scholarly\n"
         | 
| 64 | 
            -
                    result += "```\n\n"
         | 
| 65 | 
            -
                    result += "**Alternative Academic Sources:**\n"
         | 
| 66 | 
            -
                    result += "β’ arXiv (for preprints and technical papers)\n"
         | 
| 67 | 
            -
                    result += "β’ PubMed (for medical and life sciences)\n"
         | 
| 68 | 
            -
                    result += "β’ IEEE Xplore (for engineering and computer science)\n"
         | 
| 69 | 
            -
                    result += "β’ JSTOR (for humanities and social sciences)\n\n"
         | 
| 70 | 
            -
                    result += "**Research Recommendation:**\n"
         | 
| 71 | 
            -
                    result += f"For the query '{query}', consider searching:\n"
         | 
| 72 | 
            -
                    result += "β’ Recent academic publications\n"
         | 
| 73 | 
            -
                    result += "β’ Peer-reviewed research articles\n"
         | 
| 74 | 
            -
                    result += "β’ Citation networks and impact metrics\n\n"
         | 
| 75 | 
            -
                    
         | 
| 76 | 
            -
                    return result
         | 
| 77 | 
            -
                
         | 
| 78 | 
            -
                def _format_scholar_results(self, papers: List[Dict]) -> str:
         | 
| 79 | 
            -
                    """Format Google Scholar search results"""
         | 
| 80 | 
            -
                    result = ""
         | 
| 81 | 
            -
                    
         | 
| 82 | 
            -
                    for i, paper in enumerate(papers, 1):
         | 
| 83 | 
            -
                        # Extract paper information safely with better handling
         | 
| 84 | 
            -
                        title = paper.get('title', paper.get('bib', {}).get('title', 'Unknown Title'))
         | 
| 85 | 
            -
                        
         | 
| 86 | 
            -
                        # Handle authors more robustly
         | 
| 87 | 
            -
                        authors = self._format_authors(paper.get('author', paper.get('bib', {}).get('author', [])))
         | 
| 88 | 
            -
                        
         | 
| 89 | 
            -
                        # Get year from multiple possible locations
         | 
| 90 | 
            -
                        year = (paper.get('year') or 
         | 
| 91 | 
            -
                               paper.get('bib', {}).get('pub_year') or 
         | 
| 92 | 
            -
                               paper.get('bib', {}).get('year') or 
         | 
| 93 | 
            -
                               'Unknown Year')
         | 
| 94 | 
            -
                        
         | 
| 95 | 
            -
                        # Get venue from multiple possible locations
         | 
| 96 | 
            -
                        venue = (paper.get('venue') or 
         | 
| 97 | 
            -
                                paper.get('bib', {}).get('venue') or 
         | 
| 98 | 
            -
                                paper.get('bib', {}).get('journal') or 
         | 
| 99 | 
            -
                                paper.get('bib', {}).get('booktitle') or 
         | 
| 100 | 
            -
                                'Unknown Venue')
         | 
| 101 | 
            -
                        
         | 
| 102 | 
            -
                        citations = paper.get('num_citations', paper.get('citedby', 0))
         | 
| 103 | 
            -
                        
         | 
| 104 | 
            -
                        result += f"**Paper {i}: {title}**\n"
         | 
| 105 | 
            -
                        result += f"Authors: {authors}\n"
         | 
| 106 | 
            -
                        result += f"Year: {year} | Venue: {venue}\n"
         | 
| 107 | 
            -
                        result += f"Citations: {citations:,}\n"
         | 
| 108 | 
            -
                        
         | 
| 109 | 
            -
                        # Add abstract if available
         | 
| 110 | 
            -
                        abstract = (paper.get('abstract') or 
         | 
| 111 | 
            -
                                   paper.get('bib', {}).get('abstract') or 
         | 
| 112 | 
            -
                                   paper.get('summary'))
         | 
| 113 | 
            -
                        
         | 
| 114 | 
            -
                        if abstract and len(str(abstract).strip()) > 10:
         | 
| 115 | 
            -
                            abstract_text = str(abstract)
         | 
| 116 | 
            -
                            if len(abstract_text) > 300:
         | 
| 117 | 
            -
                                abstract_text = abstract_text[:300] + "..."
         | 
| 118 | 
            -
                            result += f"Abstract: {abstract_text}\n"
         | 
| 119 | 
            -
                        
         | 
| 120 | 
            -
                        # Add URL if available
         | 
| 121 | 
            -
                        url = (paper.get('url') or 
         | 
| 122 | 
            -
                              paper.get('pub_url') or 
         | 
| 123 | 
            -
                              paper.get('eprint_url'))
         | 
| 124 | 
            -
                        
         | 
| 125 | 
            -
                        if url:
         | 
| 126 | 
            -
                            result += f"URL: {url}\n"
         | 
| 127 | 
            -
                        
         | 
| 128 | 
            -
                        result += "\n"
         | 
| 129 | 
            -
                    
         | 
| 130 | 
            -
                    return result
         | 
| 131 | 
            -
                
         | 
| 132 | 
            -
                def _format_authors(self, authors) -> str:
         | 
| 133 | 
            -
                    """Format author list safely with improved handling"""
         | 
| 134 | 
            -
                    if not authors:
         | 
| 135 | 
            -
                        return "Unknown Authors"
         | 
| 136 | 
            -
                    
         | 
| 137 | 
            -
                    if isinstance(authors, str):
         | 
| 138 | 
            -
                        return authors
         | 
| 139 | 
            -
                    elif isinstance(authors, list):
         | 
| 140 | 
            -
                        # Handle list of author dictionaries or strings
         | 
| 141 | 
            -
                        author_names = []
         | 
| 142 | 
            -
                        for author in authors[:5]:  # Limit to first 5 authors
         | 
| 143 | 
            -
                            if isinstance(author, dict):
         | 
| 144 | 
            -
                                # Try different possible name fields
         | 
| 145 | 
            -
                                name = (author.get('name') or 
         | 
| 146 | 
            -
                                       author.get('full_name') or 
         | 
| 147 | 
            -
                                       author.get('firstname', '') + ' ' + author.get('lastname', '') or
         | 
| 148 | 
            -
                                       str(author))
         | 
| 149 | 
            -
                                name = name.strip()
         | 
| 150 | 
            -
                            else:
         | 
| 151 | 
            -
                                name = str(author).strip()
         | 
| 152 | 
            -
                            
         | 
| 153 | 
            -
                            if name and name != 'Unknown Authors':
         | 
| 154 | 
            -
                                author_names.append(name)
         | 
| 155 | 
            -
                        
         | 
| 156 | 
            -
                        if not author_names:
         | 
| 157 | 
            -
                            return "Unknown Authors"
         | 
| 158 | 
            -
                        
         | 
| 159 | 
            -
                        if len(authors) > 5:
         | 
| 160 | 
            -
                            author_names.append("et al.")
         | 
| 161 | 
            -
                        
         | 
| 162 | 
            -
                        return ", ".join(author_names)
         | 
| 163 | 
            -
                    else:
         | 
| 164 | 
            -
                        return str(authors) if authors else "Unknown Authors"
         | 
| 165 | 
            -
                
         | 
| 166 | 
            -
                def _analyze_research_quality(self, papers: List[Dict]) -> str:
         | 
| 167 | 
            -
                    """Analyze the quality and impact of research results"""
         | 
| 168 | 
            -
                    if not papers:
         | 
| 169 | 
            -
                        return ""
         | 
| 170 | 
            -
                    
         | 
| 171 | 
            -
                    # Calculate citation metrics
         | 
| 172 | 
            -
                    citations = [paper.get('num_citations', 0) for paper in papers]
         | 
| 173 | 
            -
                    total_citations = sum(citations)
         | 
| 174 | 
            -
                    avg_citations = total_citations / len(papers) if papers else 0
         | 
| 175 | 
            -
                    high_impact_papers = sum(1 for c in citations if c > 100)
         | 
| 176 | 
            -
                    
         | 
| 177 | 
            -
                    # Analyze publication years
         | 
| 178 | 
            -
                    years = [paper.get('year') for paper in papers if paper.get('year')]
         | 
| 179 | 
            -
                    recent_papers = sum(1 for year in years if isinstance(year, (int, str)) and str(year) in ['2023', '2024', '2025'])
         | 
| 180 | 
            -
                    
         | 
| 181 | 
            -
                    # Analyze venues
         | 
| 182 | 
            -
                    venues = [paper.get('venue', '') for paper in papers]
         | 
| 183 | 
            -
                    unique_venues = len(set(v for v in venues if v and v != 'Unknown Venue'))
         | 
| 184 | 
            -
                    
         | 
| 185 | 
            -
                    result = f"**Research Quality Analysis:**\n"
         | 
| 186 | 
            -
                    result += f"β’ Papers analyzed: {len(papers)}\n"
         | 
| 187 | 
            -
                    result += f"β’ Total citations: {total_citations:,}\n"
         | 
| 188 | 
            -
                    result += f"β’ Average citations per paper: {avg_citations:.1f}\n"
         | 
| 189 | 
            -
                    result += f"β’ High-impact papers (>100 citations): {high_impact_papers}\n"
         | 
| 190 | 
            -
                    result += f"β’ Recent publications (2023-2025): {recent_papers}\n"
         | 
| 191 | 
            -
                    result += f"β’ Venue diversity: {unique_venues} different publication venues\n"
         | 
| 192 | 
            -
                    
         | 
| 193 | 
            -
                    # Research quality assessment
         | 
| 194 | 
            -
                    if avg_citations > 50:
         | 
| 195 | 
            -
                        quality_level = "High Impact"
         | 
| 196 | 
            -
                    elif avg_citations > 20:
         | 
| 197 | 
            -
                        quality_level = "Moderate Impact"
         | 
| 198 | 
            -
                    elif avg_citations > 5:
         | 
| 199 | 
            -
                        quality_level = "Emerging Research"
         | 
| 200 | 
            -
                    else:
         | 
| 201 | 
            -
                        quality_level = "Early Stage"
         | 
| 202 | 
            -
                    
         | 
| 203 | 
            -
                    result += f"β’ Research maturity: {quality_level}\n"
         | 
| 204 | 
            -
                    
         | 
| 205 | 
            -
                    # Authority assessment
         | 
| 206 | 
            -
                    if high_impact_papers > 0 and recent_papers > 0:
         | 
| 207 | 
            -
                        authority = "High - Established field with recent developments"
         | 
| 208 | 
            -
                    elif high_impact_papers > 0:
         | 
| 209 | 
            -
                        authority = "Moderate - Established field, may need recent updates"
         | 
| 210 | 
            -
                    elif recent_papers > 0:
         | 
| 211 | 
            -
                        authority = "Emerging - New research area with growing interest"
         | 
| 212 | 
            -
                    else:
         | 
| 213 | 
            -
                        authority = "Limited - Sparse academic coverage"
         | 
| 214 | 
            -
                    
         | 
| 215 | 
            -
                    result += f"β’ Academic authority: {authority}\n\n"
         | 
| 216 | 
            -
                    
         | 
| 217 | 
            -
                    return result
         | 
| 218 | 
            -
                
         | 
| 219 | 
            -
                def should_use_for_query(self, query: str) -> bool:
         | 
| 220 | 
            -
                    """Google Scholar is good for academic research, citations, and scholarly articles"""
         | 
| 221 | 
            -
                    academic_indicators = [
         | 
| 222 | 
            -
                        'research', 'study', 'academic', 'paper', 'journal', 'peer-reviewed',
         | 
| 223 | 
            -
                        'citation', 'scholar', 'university', 'professor', 'phd', 'thesis',
         | 
| 224 | 
            -
                        'methodology', 'experiment', 'analysis', 'theory', 'empirical',
         | 
| 225 | 
            -
                        'literature review', 'meta-analysis', 'systematic review',
         | 
| 226 | 
            -
                        'conference', 'publication', 'scholarly'
         | 
| 227 | 
            -
                    ]
         | 
| 228 | 
            -
                    
         | 
| 229 | 
            -
                    query_lower = query.lower()
         | 
| 230 | 
            -
                    return any(indicator in query_lower for indicator in academic_indicators)
         | 
| 231 | 
            -
                
         | 
| 232 | 
            -
                def extract_key_info(self, text: str) -> dict:
         | 
| 233 | 
            -
                    """Extract key information from Scholar results"""
         | 
| 234 | 
            -
                    base_info = super().extract_key_info(text)
         | 
| 235 | 
            -
                    
         | 
| 236 | 
            -
                    if text:
         | 
| 237 | 
            -
                        # Look for Scholar-specific patterns
         | 
| 238 | 
            -
                        base_info.update({
         | 
| 239 | 
            -
                            'has_citations': 'Citations:' in text,
         | 
| 240 | 
            -
                            'has_abstracts': 'Abstract:' in text,
         | 
| 241 | 
            -
                            'has_venues': 'Venue:' in text,
         | 
| 242 | 
            -
                            'has_recent_papers': any(year in text for year in ['2023', '2024', '2025']),
         | 
| 243 | 
            -
                            'has_high_impact': any(citation in text for citation in ['100', '200', '500', '1000']),
         | 
| 244 | 
            -
                            'is_available': 'Library Not Available' not in text,
         | 
| 245 | 
            -
                            'paper_count': text.count('**Paper')
         | 
| 246 | 
            -
                        })
         | 
| 247 | 
            -
                    
         | 
| 248 | 
            -
                    return base_info
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        test_research_tools.py
    CHANGED
    
    | @@ -35,7 +35,6 @@ def test_tool_imports(): | |
| 35 | 
             
                    from research_tools.arxiv_search import ArxivSearchTool
         | 
| 36 | 
             
                    from research_tools.github_search import GitHubSearchTool
         | 
| 37 | 
             
                    from research_tools.sec_search import SECSearchTool
         | 
| 38 | 
            -
                    from research_tools.scholar_search import GoogleScholarTool
         | 
| 39 |  | 
| 40 | 
             
                    print("β
 All tool imports successful")
         | 
| 41 | 
             
                    return True
         | 
| @@ -108,8 +107,7 @@ def test_individual_tools(): | |
| 108 | 
             
                        'wikipedia': ('artificial intelligence', {}),
         | 
| 109 | 
             
                        'arxiv': ('machine learning', {}),
         | 
| 110 | 
             
                        'github': ('python', {}),
         | 
| 111 | 
            -
                        'sec': ('Apple', {}) | 
| 112 | 
            -
                        'scholar': ('deep learning', {})
         | 
| 113 | 
             
                    }
         | 
| 114 |  | 
| 115 | 
             
                    for tool_name, (query, kwargs) in test_queries.items():
         | 
| @@ -262,7 +260,6 @@ def test_dependency_check(): | |
| 262 | 
             
                    'requests': 'HTTP requests',
         | 
| 263 | 
             
                    'xml.etree.ElementTree': 'XML parsing (built-in)',
         | 
| 264 | 
             
                    'wikipedia': 'Wikipedia search',
         | 
| 265 | 
            -
                    'scholarly': 'Google Scholar (optional)',
         | 
| 266 | 
             
                    'smolagents': 'Web search agents'
         | 
| 267 | 
             
                }
         | 
| 268 |  | 
|  | |
| 35 | 
             
                    from research_tools.arxiv_search import ArxivSearchTool
         | 
| 36 | 
             
                    from research_tools.github_search import GitHubSearchTool
         | 
| 37 | 
             
                    from research_tools.sec_search import SECSearchTool
         | 
|  | |
| 38 |  | 
| 39 | 
             
                    print("β
 All tool imports successful")
         | 
| 40 | 
             
                    return True
         | 
|  | |
| 107 | 
             
                        'wikipedia': ('artificial intelligence', {}),
         | 
| 108 | 
             
                        'arxiv': ('machine learning', {}),
         | 
| 109 | 
             
                        'github': ('python', {}),
         | 
| 110 | 
            +
                        'sec': ('Apple', {})
         | 
|  | |
| 111 | 
             
                    }
         | 
| 112 |  | 
| 113 | 
             
                    for tool_name, (query, kwargs) in test_queries.items():
         | 
|  | |
| 260 | 
             
                    'requests': 'HTTP requests',
         | 
| 261 | 
             
                    'xml.etree.ElementTree': 'XML parsing (built-in)',
         | 
| 262 | 
             
                    'wikipedia': 'Wikipedia search',
         | 
|  | |
| 263 | 
             
                    'smolagents': 'Web search agents'
         | 
| 264 | 
             
                }
         | 
| 265 |  | 

