Shreyas094 commited on
Commit
372531f
·
verified ·
1 Parent(s): ebd154b

Upload 528 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .cursorrules +1 -0
  2. .dockerignore +2 -0
  3. .env +8 -0
  4. .gitattributes +21 -0
  5. .github/ISSUE_TEMPLATE/bug_report.md +38 -0
  6. .github/ISSUE_TEMPLATE/feature_request.md +20 -0
  7. .github/dependabot.yml +15 -0
  8. .github/workflows/docker-build.yml +45 -0
  9. .gitignore +53 -0
  10. CODE_OF_CONDUCT.md +123 -0
  11. CONTRIBUTING.md +42 -0
  12. CURSOR_RULES.md +181 -0
  13. Dockerfile +46 -0
  14. LICENSE +201 -0
  15. Procfile +1 -0
  16. README-ja_JP.md +159 -0
  17. README-ko_KR.md +242 -0
  18. README-zh_CN.md +158 -0
  19. README.md +231 -11
  20. __pycache__/main.cpython-312.pyc +0 -0
  21. backend/__init__.py +1 -0
  22. backend/__pycache__/__init__.cpython-312.pyc +0 -0
  23. backend/__pycache__/utils.cpython-312.pyc +0 -0
  24. backend/chat/__init__.py +1 -0
  25. backend/chat/__pycache__/__init__.cpython-312.pyc +0 -0
  26. backend/chat/__pycache__/chat.cpython-312.pyc +0 -0
  27. backend/chat/chat.py +106 -0
  28. backend/memory/__init__.py +0 -0
  29. backend/memory/draft.py +10 -0
  30. backend/memory/research.py +20 -0
  31. backend/report_type/__init__.py +7 -0
  32. backend/report_type/__pycache__/__init__.cpython-312.pyc +0 -0
  33. backend/report_type/basic_report/__init__.py +0 -0
  34. backend/report_type/basic_report/__pycache__/__init__.cpython-312.pyc +0 -0
  35. backend/report_type/basic_report/__pycache__/basic_report.cpython-312.pyc +0 -0
  36. backend/report_type/basic_report/basic_report.py +46 -0
  37. backend/report_type/detailed_report/README.md +12 -0
  38. backend/report_type/detailed_report/__init__.py +0 -0
  39. backend/report_type/detailed_report/__pycache__/__init__.cpython-312.pyc +0 -0
  40. backend/report_type/detailed_report/__pycache__/detailed_report.cpython-312.pyc +0 -0
  41. backend/report_type/detailed_report/detailed_report.py +139 -0
  42. backend/server/__init__.py +0 -0
  43. backend/server/__pycache__/__init__.cpython-312.pyc +0 -0
  44. backend/server/__pycache__/server.cpython-312.pyc +0 -0
  45. backend/server/__pycache__/server_utils.cpython-312.pyc +0 -0
  46. backend/server/__pycache__/websocket_manager.cpython-312.pyc +0 -0
  47. backend/server/app.py +16 -0
  48. backend/server/logging_config.py +83 -0
  49. backend/server/server.py +134 -0
  50. backend/server/server_utils.py +259 -0
.cursorrules ADDED
@@ -0,0 +1 @@
 
 
1
+ # Project Overview This project, named GPT-Researcher, LLM based autonomous agent that conducts local and web research on any topic and generates a comprehensive report with citations, is built using Next.js and TypeScript. It integrates various libraries for their strenghts. Your primary goal is to help with Next.js app router patterns, TypeScript type safety, Tailwind CSS best practices, code quality standards, and Python/FastAPI backend optimizations. # Key URLs - Project Home Page: https://gptr.dev/ - GitHub Repository: https://github.com/assafelovic/gpt-researcher - Documentation: https://docs.gptr.dev/ # Project Structure - Frontend user interface built with Next.js, TypeScript, and Tailwind CSS in `/frontend` - Static FastAPI version for lightweight deployments - Next.js version for production use with enhanced features - Multi-agent research system using LangChain and LangGraph in `/backend/multi_agents` - Browser, Editor, Researcher, Reviewer, Revisor, Writer, and Publisher agents - Task configuration and agent coordination - Document processing using Unstructured and PyMuPDF in `/backend/document_processing` - PDF, DOCX, and web content parsing - Text extraction and preprocessing - Report generation using LangChain and Jinja2 templates in `/backend/report_generation` - Template-based report structuring - Dynamic content formatting - Multiple output formats in `/backend/output_formats` - PDF via md2pdf - Markdown via mistune - DOCX via python-docx - Format conversion utilities - Export functionality - GPT Researcher core functionality in `/gpt_researcher` - Web scraping and content aggregation - Research planning and execution - Source validation and tracking - Query processing and response generation - Testing infrastructure in `/tests` - Unit tests for individual components - Integration tests for agent interactions - End-to-end research workflow tests - Mock data and fixtures for testing # Language Model Configuration - Default model: gpt-4-turbo - Alternative models: gpt-3.5-turbo, claude-3-opus - Temperature settings for different tasks - Context window management - Token limit handling - Cost optimization strategies # Error Handling - Research failure recovery - API rate limiting - Network timeout handling - Invalid input management - Source validation errors - Report generation failures # Performance - Parallel processing strategies - Caching mechanisms - Memory management - Response streaming - Resource allocation - Query optimization # Development Workflow - Branch naming conventions - Commit message format - PR review process - Testing requirements - Documentation updates - Version control guidelines # API Documentation - REST endpoints - WebSocket events - Request/Response formats - Authentication methods - Rate limits - Error codes # Monitoring - Performance metrics - Error tracking - Usage statistics - Cost monitoring - Research quality metrics - User feedback tracking # Frontend Components - Static FastAPI version for lightweight deployments - Next.js version for production use with enhanced features # Backend Components - Multi-agent system architecture - Document processing pipeline - Report generation system - Output format handlers # Core Research Components - Web scraping and aggregation - Research planning and execution - Source validation - Query processing # Testing - Unit tests - Integration tests - End-to-end tests - Performance testing # Rule Violation Monitoring - Alert developer when changes conflict with project structure - Warn about deviations from coding standards - Flag unauthorized framework or library additions - Monitor for security and performance anti-patterns - Track API usage patterns that may violate guidelines - Report TypeScript strict mode violations - Identify accessibility compliance issues # Development Guidelines - Use TypeScript with strict mode enabled - Follow ESLint and Prettier configurations - Ensure components are responsive and accessible - Use Tailwind CSS for styling, following the project's design system - Minimize AI-generated comments, prefer self-documenting code - Follow React best practices and hooks guidelines - Validate all user inputs and API responses - Use existing components as reference implementations # Important Scripts - `npm run dev`: Start development server - `npm run build`: Build for production - `npm run test`: Run test suite - `python -m pytest`: Run Python tests - `docker-compose up`: Start all services - `docker-compose run gpt-researcher-tests`: Run test suite in container - `python -m uvicorn backend.server.server:app --host=0.0.0.0 --port=8000`: Start FastAPI server - `python -m uvicorn backend.server.server:app --reload`: Start FastAPI server with auto-reload for development - `python main.py`: Run the main application directly # AI Integration Guidelines - Prioritize type safety in all AI interactions - Follow LangChain and LangGraph best practices - Implement proper error handling for AI responses - Maintain context window limits - Handle rate limiting and API quotas - Validate AI outputs before processing - Log AI interactions for debugging # Lexicon - **GPT Researcher**: Autonomous research agent system - **Multi-Agent System**: Coordinated AI agents for research tasks - **Research Pipeline**: End-to-end research workflow - **Agent Roles**: Browser, Editor, Researcher, Reviewer, Revisor, Writer, Publisher - **Source Validation**: Verification of research sources - **Report Generation**: Process of creating final research output # Additional Resources - [Next.js Documentation](https://nextjs.org/docs) - [TypeScript Handbook](https://www.typescriptlang.org/docs/) - [Tailwind CSS Documentation](https://tailwindcss.com/docs) - [LangChain Documentation](https://python.langchain.com/docs/) - [FastAPI Documentation](https://fastapi.tiangolo.com/) - [Project Documentation](https://docs.gptr.dev/) End all your comments with a :-) symbol.
.dockerignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .git
2
+ output/
.env ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ GOOGLE_API_KEY=AIzaSyCISHY92IzU60M8Jf0qCWIRCyhGUAj_haU
2
+ FAST_LLM="google_genai:gemini-1.5-flash"
3
+ SMART_LLM="google_genai:gemini-1.5-pro"
4
+ STRATEGIC_LLM="google_genai:gemini-1.5-pro"
5
+
6
+ EMBEDDING="google_genai:models/text-embedding-004"
7
+
8
+ TAVILY_API_KEY=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
.gitattributes CHANGED
@@ -33,3 +33,24 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ docs/blog/2023-09-22-gpt-researcher/architecture.png filter=lfs diff=lfs merge=lfs -text
37
+ docs/blog/2023-09-22-gpt-researcher/planner.jpeg filter=lfs diff=lfs merge=lfs -text
38
+ docs/blog/2024-05-19-gptr-langgraph/blog-langgraph.jpeg filter=lfs diff=lfs merge=lfs -text
39
+ docs/blog/2024-09-7-hybrid-research/gptr-hybrid.png filter=lfs diff=lfs merge=lfs -text
40
+ docs/docs/gpt-researcher/context/gptr-hybrid.png filter=lfs diff=lfs merge=lfs -text
41
+ docs/static/img/architecture.png filter=lfs diff=lfs merge=lfs -text
42
+ docs/static/img/leaderboard.png filter=lfs diff=lfs merge=lfs -text
43
+ frontend/nextjs/public/img/agents/academicResearchAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
44
+ frontend/nextjs/public/img/agents/businessAnalystAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
45
+ frontend/nextjs/public/img/agents/computerSecurityanalystAvatar.png filter=lfs diff=lfs merge=lfs -text
46
+ frontend/nextjs/public/img/agents/financeAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
47
+ frontend/nextjs/public/img/agents/mathAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
48
+ frontend/nextjs/public/img/agents/travelAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
49
+ frontend/nextjs/public/img/gptr-logo.png filter=lfs diff=lfs merge=lfs -text
50
+ frontend/static/academicResearchAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
51
+ frontend/static/businessAnalystAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
52
+ frontend/static/computerSecurityanalystAvatar.png filter=lfs diff=lfs merge=lfs -text
53
+ frontend/static/financeAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
54
+ frontend/static/mathAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
55
+ frontend/static/travelAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
56
+ tests/docs/doc.pdf filter=lfs diff=lfs merge=lfs -text
.github/ISSUE_TEMPLATE/bug_report.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Bug report
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the bug**
11
+ A clear and concise description of what the bug is.
12
+
13
+ **To Reproduce**
14
+ Steps to reproduce the behavior:
15
+ 1. Go to '...'
16
+ 2. Click on '....'
17
+ 3. Scroll down to '....'
18
+ 4. See error
19
+
20
+ **Expected behavior**
21
+ A clear and concise description of what you expected to happen.
22
+
23
+ **Screenshots**
24
+ If applicable, add screenshots to help explain your problem.
25
+
26
+ **Desktop (please complete the following information):**
27
+ - OS: [e.g. iOS]
28
+ - Browser [e.g. chrome, safari]
29
+ - Version [e.g. 22]
30
+
31
+ **Smartphone (please complete the following information):**
32
+ - Device: [e.g. iPhone6]
33
+ - OS: [e.g. iOS8.1]
34
+ - Browser [e.g. stock browser, safari]
35
+ - Version [e.g. 22]
36
+
37
+ **Additional context**
38
+ Add any other context about the problem here.
.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Is your feature request related to a problem? Please describe.**
11
+ A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12
+
13
+ **Describe the solution you'd like**
14
+ A clear and concise description of what you want to happen.
15
+
16
+ **Describe alternatives you've considered**
17
+ A clear and concise description of any alternative solutions or features you've considered.
18
+
19
+ **Additional context**
20
+ Add any other context or screenshots about the feature request here.
.github/dependabot.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ - package-ecosystem: "pip" # See documentation for possible values
9
+ directory: "/" # Location of package manifests
10
+ schedule:
11
+ interval: "weekly"
12
+ - package-ecosystem: "docker"
13
+ directory: "/"
14
+ schedule:
15
+ interval: "weekly"
.github/workflows/docker-build.yml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: GPTR tests
2
+ run-name: ${{ github.actor }} ran the GPTR tests flow
3
+ permissions:
4
+ contents: read
5
+ pull-requests: write
6
+ on:
7
+ workflow_dispatch: # Add this line to enable manual triggering
8
+ # pull_request:
9
+ # types: [opened, synchronize]
10
+
11
+ jobs:
12
+ docker:
13
+ runs-on: ubuntu-latest
14
+ environment: tests # Specify the environment to use for this job
15
+ env:
16
+ # Ensure these environment variables are set for the entire job
17
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
18
+ TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }}
19
+ LANGCHAIN_API_KEY: ${{ secrets.LANGCHAIN_API_KEY }}
20
+ steps:
21
+ - name: Git checkout
22
+ uses: actions/checkout@v3
23
+
24
+ - name: Set up QEMU
25
+ uses: docker/setup-qemu-action@v2
26
+
27
+ - name: Set up Docker Buildx
28
+ uses: docker/setup-buildx-action@v2
29
+ with:
30
+ driver: docker
31
+
32
+ # - name: Build Docker images
33
+ # uses: docker/build-push-action@v4
34
+ # with:
35
+ # push: false
36
+ # tags: gptresearcher/gpt-researcher:latest
37
+ # file: Dockerfile
38
+
39
+ - name: Set up Docker Compose
40
+ run: |
41
+ sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
42
+ sudo chmod +x /usr/local/bin/docker-compose
43
+ - name: Run tests with Docker Compose
44
+ run: |
45
+ docker-compose --profile test run --rm gpt-researcher-tests
.gitignore ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Ignore env containing secrets
2
+ .env
3
+ .venv
4
+ .envrc
5
+
6
+ #Ignore Virtual Env
7
+ env/
8
+ venv/
9
+ .venv/
10
+
11
+ # Other Environments
12
+ ENV/
13
+ env.bak/
14
+ venv.bak/
15
+
16
+ #Ignore generated outputs
17
+ outputs/
18
+ *.lock
19
+ dist/
20
+ gpt_researcher.egg-info/
21
+
22
+ #Ignore my local docs
23
+ my-docs/
24
+
25
+ #Ignore pycache
26
+ **/__pycache__/
27
+
28
+ #Ignore mypy cache
29
+ .mypy_cache/
30
+ node_modules
31
+ .idea
32
+ .DS_Store
33
+ .docusaurus
34
+ build
35
+ docs/build
36
+
37
+ .vscode/launch.json
38
+ .langgraph-data/
39
+ .next/
40
+ package-lock.json
41
+
42
+ #Vim swp files
43
+ *.swp
44
+
45
+ # Log files
46
+ logs/
47
+ *.orig
48
+ *.log
49
+ server_log.txt
50
+
51
+ #Cursor Rules
52
+ .cursorrules
53
+ CURSOR_RULES.md
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We, as members, contributors, and leaders, pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, religion, sexual identity, or
10
+ orientation.
11
+
12
+ We commit to acting and interacting in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ - Demonstrating empathy and kindness toward others
21
+ - Being respectful of differing opinions, viewpoints, and experiences
22
+ - Giving and gracefully accepting constructive feedback
23
+ - Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
24
+ - Focusing on what is best not just for us as individuals, but for the
25
+ overall community
26
+
27
+ Examples of unacceptable behavior include:
28
+
29
+ - The use of sexualized language or imagery, and sexual attention or
30
+ advances of any kind
31
+ - Trolling, insulting or derogatory comments, and personal or political attacks
32
+ - Public or private harassment
33
+ - Publishing others' private information, such as a physical or email address, without their explicit permission
34
+ - Other conduct that could reasonably be considered inappropriate in a professional setting
35
+
36
+ ## Enforcement Responsibilities
37
+
38
+ Community leaders are responsible for clarifying and enforcing our standards of
39
+ acceptable behavior and will take appropriate and fair corrective action in
40
+ response to any behavior deemed inappropriate, threatening, offensive,
41
+ or harmful.
42
+
43
+ Community leaders have the right and responsibility to remove, edit, or reject
44
+ comments, commits, code, wiki edits, issues, and other contributions that do not
45
+ align with this Code of Conduct, and will communicate reasons for moderation
46
+ decisions when appropriate.
47
+
48
+ ## Scope
49
+
50
+ This Code of Conduct applies to all community spaces and also applies when
51
+ an individual is officially representing the community in public spaces.
52
+ Examples include using an official email address, posting via an official
53
+ social media account, or acting as an appointed representative at an online or offline event.
54
+
55
+ ## Enforcement
56
+
57
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
+ reported to the community leaders responsible for enforcement at
59
60
+ All complaints will be reviewed and investigated promptly and fairly.
61
+
62
+ All community leaders are obligated to respect the privacy and security of the
63
+ reporter of any incident.
64
+
65
+ ## Enforcement Guidelines
66
+
67
+ Community leaders will follow these Community Impact Guidelines in determining
68
+ the consequences for any action they deem in violation of this Code of Conduct:
69
+
70
+ ### 1. Correction
71
+
72
+ **Community Impact**: Use of inappropriate language or other behavior deemed
73
+ unprofessional or unwelcome in the community.
74
+
75
+ **Consequence**: A private, written warning from community leaders, providing
76
+ clarity around the nature of the violation and an explanation of why the
77
+ behavior was inappropriate. A public apology may be requested.
78
+
79
+ ### 2. Warning
80
+
81
+ **Community Impact**: A violation through a single incident or series
82
+ of actions.
83
+
84
+ **Consequence**: A warning with consequences for continued behavior. No
85
+ interaction with the people involved, including unsolicited interaction with
86
+ those enforcing the Code of Conduct, for a specified period. This includes
87
+ avoiding interactions in community spaces and external channels like social media.
88
+ Violating these terms may lead to a temporary or permanent ban.
89
+
90
+ ### 3. Temporary Ban
91
+
92
+ **Community Impact**: A serious violation of community standards, including
93
+ sustained inappropriate behavior.
94
+
95
+ **Consequence**: A temporary ban from any interaction or public
96
+ communication with the community for a specified period. No public or
97
+ private interaction with the people involved, including unsolicited interaction
98
+ with those enforcing the Code of Conduct, is allowed during this period.
99
+ Violating these terms may lead to a permanent ban.
100
+
101
+ ### 4. Permanent Ban
102
+
103
+ **Community Impact**: Demonstrating a pattern of violation of community
104
+ standards, including sustained inappropriate behavior, harassment of an
105
+ individual, or aggression toward or disparagement of groups of individuals.
106
+
107
+ **Consequence**: A permanent ban from any public interaction within
108
+ the community.
109
+
110
+ ## Attribution
111
+
112
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
113
+ version 2.0, available at
114
+ https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
115
+
116
+ Community Impact Guidelines were inspired by [Mozilla's code of conduct
117
+ enforcement ladder](https://github.com/mozilla/diversity).
118
+
119
+ [homepage]: https://www.contributor-covenant.org
120
+
121
+ For answers to common questions about this code of conduct, see the FAQ at
122
+ https://www.contributor-covenant.org/faq. Translations are available at
123
+ https://www.contributor-covenant.org/translations.
CONTRIBUTING.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to GPT Researcher
2
+
3
+ First off, we'd like to welcome you and thank you for your interest and effort in contributing to our open-source project ❤️. Contributions of all forms are welcome—from new features and bug fixes to documentation and more.
4
+
5
+ We are on a mission to build the #1 AI agent for comprehensive, unbiased, and factual research online, and we need your support to achieve this grand vision.
6
+
7
+ Please take a moment to review this document to make the contribution process easy and effective for everyone involved.
8
+
9
+ ## Reporting Issues
10
+
11
+ If you come across any issue or have an idea for an improvement, don't hesitate to create an issue on GitHub. Describe your problem in sufficient detail, providing as much relevant information as possible. This way, we can reproduce the issue before attempting to fix it or respond appropriately.
12
+
13
+ ## Contributing Code
14
+
15
+ 1. **Fork the repository and create your branch from `master`.**
16
+ If it’s not an urgent bug fix, branch from `master` and work on the feature or fix there.
17
+
18
+ 2. **Make your changes.**
19
+ Implement your changes following best practices for coding in the project's language.
20
+
21
+ 3. **Test your changes.**
22
+ Ensure that your changes pass all tests if any exist. If the project doesn’t have automated tests, test your changes manually to confirm they behave as expected.
23
+
24
+ 4. **Follow the coding style.**
25
+ Ensure your code adheres to the coding conventions used throughout the project, including indentation, accurate comments, etc.
26
+
27
+ 5. **Commit your changes.**
28
+ Make your Git commits informative and concise. This is very helpful for others when they look at the Git log.
29
+
30
+ 6. **Push to your fork and submit a pull request.**
31
+ When your work is ready and passes tests, push your branch to your fork of the repository and submit a pull request from there.
32
+
33
+ 7. **Pat yourself on the back and wait for review.**
34
+ Your work is done, congratulations! Now sit tight. The project maintainers will review your submission as soon as possible. They might suggest changes or ask for improvements. Both constructive conversation and patience are key to the collaboration process.
35
+
36
+ ## Documentation
37
+
38
+ If you would like to contribute to the project's documentation, please follow the same steps: fork the repository, make your changes, test them, and submit a pull request.
39
+
40
+ Documentation is a vital part of any software. It's not just about having good code; ensuring that users and contributors understand what's going on, how to use the software, or how to contribute is crucial.
41
+
42
+ We're grateful for all our contributors, and we look forward to building the world's leading AI research agent hand-in-hand with you. Let's harness the power of open source and AI to change the world together!
CURSOR_RULES.md ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ > **Note**: This is a readable copy of the `.cursorrules` file maintained for legibility. The actual rules are implemented from the `.cursorrules` file in the root directory.
2
+
3
+ # GPT-Researcher Cursor Rules
4
+
5
+ ## Project Overview
6
+ This project, named GPT-Researcher, is an LLM-based autonomous agent that conducts local and web research on any topic and generates a comprehensive report with citations. It is built using Next.js and TypeScript, integrating various libraries for their strengths.
7
+
8
+ Your primary goal is to help with:
9
+ - Next.js app router patterns
10
+ - TypeScript type safety
11
+ - Tailwind CSS best practices
12
+ - Code quality standards
13
+ - Python/FastAPI backend optimizations
14
+
15
+ ## Key URLs
16
+ - Project Home Page: https://gptr.dev/
17
+ - GitHub Repository: https://github.com/assafelovic/gpt-researcher
18
+ - Documentation: https://docs.gptr.dev/
19
+
20
+ ## Project Structure
21
+ - Frontend user interface built with Next.js, TypeScript, and Tailwind CSS in `/frontend`
22
+ - Static FastAPI version for lightweight deployments
23
+ - Next.js version for production use with enhanced features
24
+
25
+ - Multi-agent research system using LangChain and LangGraph in `/backend/multi_agents`
26
+ - Browser, Editor, Researcher, Reviewer, Revisor, Writer, and Publisher agents
27
+ - Task configuration and agent coordination
28
+
29
+ - Document processing using Unstructured and PyMuPDF in `/backend/document_processing`
30
+ - PDF, DOCX, and web content parsing
31
+ - Text extraction and preprocessing
32
+
33
+ - Report generation using LangChain and Jinja2 templates in `/backend/report_generation`
34
+ - Template-based report structuring
35
+ - Dynamic content formatting
36
+
37
+ - Multiple output formats in `/backend/output_formats`
38
+ - PDF via md2pdf
39
+ - Markdown via mistune
40
+ - DOCX via python-docx
41
+ - Format conversion utilities
42
+ - Export functionality
43
+
44
+ - GPT Researcher core functionality in `/gpt_researcher`
45
+ - Web scraping and content aggregation
46
+ - Research planning and execution
47
+ - Source validation and tracking
48
+ - Query processing and response generation
49
+
50
+ - Testing infrastructure in `/tests`
51
+ - Unit tests for individual components
52
+ - Integration tests for agent interactions
53
+ - End-to-end research workflow tests
54
+ - Mock data and fixtures for testing
55
+
56
+ ## Language Model Configuration
57
+ - Default model: gpt-4-turbo
58
+ - Alternative models: gpt-3.5-turbo, claude-3-opus
59
+ - Temperature settings for different tasks
60
+ - Context window management
61
+ - Token limit handling
62
+ - Cost optimization strategies
63
+
64
+ ## Error Handling
65
+ - Research failure recovery
66
+ - API rate limiting
67
+ - Network timeout handling
68
+ - Invalid input management
69
+ - Source validation errors
70
+ - Report generation failures
71
+
72
+ ## Performance
73
+ - Parallel processing strategies
74
+ - Caching mechanisms
75
+ - Memory management
76
+ - Response streaming
77
+ - Resource allocation
78
+ - Query optimization
79
+
80
+ ## Development Workflow
81
+ - Branch naming conventions
82
+ - Commit message format
83
+ - PR review process
84
+ - Testing requirements
85
+ - Documentation updates
86
+ - Version control guidelines
87
+
88
+ ## API Documentation
89
+ - REST endpoints
90
+ - WebSocket events
91
+ - Request/Response formats
92
+ - Authentication methods
93
+ - Rate limits
94
+ - Error codes
95
+
96
+ ## Monitoring
97
+ - Performance metrics
98
+ - Error tracking
99
+ - Usage statistics
100
+ - Cost monitoring
101
+ - Research quality metrics
102
+ - User feedback tracking
103
+
104
+ ## Frontend Components
105
+ - Static FastAPI version for lightweight deployments
106
+ - Next.js version for production use with enhanced features
107
+
108
+ ## Backend Components
109
+ - Multi-agent system architecture
110
+ - Document processing pipeline
111
+ - Report generation system
112
+ - Output format handlers
113
+
114
+ ## Core Research Components
115
+ - Web scraping and aggregation
116
+ - Research planning and execution
117
+ - Source validation
118
+ - Query processing
119
+
120
+ ## Testing
121
+ - Unit tests
122
+ - Integration tests
123
+ - End-to-end tests
124
+ - Performance testing
125
+
126
+ ## Rule Violation Monitoring
127
+ - Alert developer when changes conflict with project structure
128
+ - Warn about deviations from coding standards
129
+ - Flag unauthorized framework or library additions
130
+ - Monitor for security and performance anti-patterns
131
+ - Track API usage patterns that may violate guidelines
132
+ - Report TypeScript strict mode violations
133
+ - Identify accessibility compliance issues
134
+
135
+ ## Development Guidelines
136
+ - Use TypeScript with strict mode enabled
137
+ - Follow ESLint and Prettier configurations
138
+ - Ensure components are responsive and accessible
139
+ - Use Tailwind CSS for styling, following the project's design system
140
+ - Minimize AI-generated comments, prefer self-documenting code
141
+ - Follow React best practices and hooks guidelines
142
+ - Validate all user inputs and API responses
143
+ - Use existing components as reference implementations
144
+
145
+ ## Important Scripts
146
+ - `npm run dev`: Start development server
147
+ - `npm run build`: Build for production
148
+ - `npm run test`: Run test suite
149
+ - `python -m pytest`: Run Python tests
150
+ - `python -m uvicorn backend.server.server:app --host=0.0.0.0 --port=8000`: Start FastAPI server
151
+ - `python -m uvicorn backend.server.server:app --reload`: Start FastAPI server with auto-reload for development
152
+ - `python main.py`: Run the main application directly
153
+ - `docker-compose up`: Start all services
154
+ - `docker-compose run gpt-researcher-tests`: Run test suite in container
155
+
156
+ ## AI Integration Guidelines
157
+ - Prioritize type safety in all AI interactions
158
+ - Follow LangChain and LangGraph best practices
159
+ - Implement proper error handling for AI responses
160
+ - Maintain context window limits
161
+ - Handle rate limiting and API quotas
162
+ - Validate AI outputs before processing
163
+ - Log AI interactions for debugging
164
+
165
+ ## Lexicon
166
+ - **GPT Researcher**: Autonomous research agent system
167
+ - **Multi-Agent System**: Coordinated AI agents for research tasks
168
+ - **Research Pipeline**: End-to-end research workflow
169
+ - **Agent Roles**: Browser, Editor, Researcher, Reviewer, Revisor, Writer, Publisher
170
+ - **Source Validation**: Verification of research sources
171
+ - **Report Generation**: Process of creating final research output
172
+
173
+ ## Additional Resources
174
+ - [Next.js Documentation](https://nextjs.org/docs)
175
+ - [TypeScript Handbook](https://www.typescriptlang.org/docs/)
176
+ - [Tailwind CSS Documentation](https://tailwindcss.com/docs)
177
+ - [LangChain Documentation](https://python.langchain.com/docs/)
178
+ - [FastAPI Documentation](https://fastapi.tiangolo.com/)
179
+ - [Project Documentation](https://docs.gptr.dev/)
180
+
181
+ _Note: End all your comments with a :-) symbol._
Dockerfile ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: Browser and build tools installation
2
+ FROM python:3.11.4-slim-bullseye AS install-browser
3
+
4
+ # Install Chromium, Chromedriver, Firefox, Geckodriver, and build tools in one layer
5
+ RUN apt-get update && \
6
+ apt-get satisfy -y "chromium, chromium-driver (>= 115.0)" && \
7
+ apt-get install -y --no-install-recommends firefox-esr wget build-essential && \
8
+ wget https://github.com/mozilla/geckodriver/releases/download/v0.33.0/geckodriver-v0.33.0-linux64.tar.gz && \
9
+ tar -xvzf geckodriver-v0.33.0-linux64.tar.gz && \
10
+ chmod +x geckodriver && \
11
+ mv geckodriver /usr/local/bin/ && \
12
+ rm geckodriver-v0.33.0-linux64.tar.gz && \
13
+ chromium --version && chromedriver --version && \
14
+ rm -rf /var/lib/apt/lists/* # Clean up apt lists to reduce image size
15
+
16
+ # Stage 2: Python dependencies installation
17
+ FROM install-browser AS gpt-researcher-install
18
+
19
+ ENV PIP_ROOT_USER_ACTION=ignore
20
+ WORKDIR /usr/src/app
21
+
22
+ # Copy and install Python dependencies in a single layer to optimize cache usage
23
+ COPY ./requirements.txt ./requirements.txt
24
+ COPY ./multi_agents/requirements.txt ./multi_agents/requirements.txt
25
+
26
+ RUN pip install --no-cache-dir -r requirements.txt && \
27
+ pip install --no-cache-dir -r multi_agents/requirements.txt
28
+
29
+ # Stage 3: Final stage with non-root user and app
30
+ FROM gpt-researcher-install AS gpt-researcher
31
+
32
+ # Create a non-root user for security
33
+ RUN useradd -ms /bin/bash gpt-researcher && \
34
+ chown -R gpt-researcher:gpt-researcher /usr/src/app
35
+
36
+ USER gpt-researcher
37
+ WORKDIR /usr/src/app
38
+
39
+ # Copy the rest of the application files with proper ownership
40
+ COPY --chown=gpt-researcher:gpt-researcher ./ ./
41
+
42
+ # Expose the application's port
43
+ EXPOSE 8000
44
+
45
+ # Define the default command to run the application
46
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: python -m uvicorn backend.server.server:app --host=0.0.0.0 --port=${PORT}
README-ja_JP.md ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <!--<h1 style="display: flex; align-items: center; gap: 10px;">
3
+ <img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/a45bac7c-092c-42e5-8eb6-69acbf20dde5" alt="Logo" width="25">
4
+ GPT Researcher
5
+ </h1>-->
6
+ <img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/20af8286-b386-44a5-9a83-3be1365139c3" alt="Logo" width="80">
7
+
8
+
9
+ ####
10
+
11
+ [![公式サイト](https://img.shields.io/badge/公式サイト-gptr.dev-blue?style=for-the-badge&logo=world&logoColor=white)](https://gptr.dev)
12
+ [![Documentation](https://img.shields.io/badge/Documentation-DOCS-f472b6?logo=googledocs&logoColor=white&style=for-the-badge)](https://docs.gptr.dev)
13
+ [![Discord Follow](https://img.shields.io/discord/1127851779011391548?style=for-the-badge&logo=discord&label=Chat%20on%20Discord)](https://discord.gg/QgZXvJAccX)
14
+
15
+ [![PyPI version](https://img.shields.io/pypi/v/gpt-researcher?logo=pypi&logoColor=white&style=flat)](https://badge.fury.io/py/gpt-researcher)
16
+ ![GitHub Release](https://img.shields.io/github/v/release/assafelovic/gpt-researcher?style=flat&logo=github)
17
+ [![Open In Colab](https://img.shields.io/static/v1?message=Open%20in%20Colab&logo=googlecolab&labelColor=grey&color=yellow&label=%20&style=flat&logoSize=40)](https://colab.research.google.com/github/assafelovic/gpt-researcher/blob/master/docs/docs/examples/pip-run.ipynb)
18
+ [![Docker Image Version](https://img.shields.io/docker/v/elestio/gpt-researcher/latest?arch=amd64&style=flat&logo=docker&logoColor=white&color=1D63ED)](https://hub.docker.com/r/gptresearcher/gpt-researcher)
19
+ [![Twitter Follow](https://img.shields.io/twitter/follow/assaf_elovic?style=social)](https://twitter.com/assaf_elovic)
20
+
21
+ [English](README.md) |
22
+ [中文](README-zh_CN.md) |
23
+ [日本語](README-ja_JP.md) |
24
+ [한국어](README-ko_KR.md)
25
+ </div>
26
+
27
+ # 🔎 GPT Researcher
28
+
29
+ **GPT Researcher は、さまざまなタスクに対する包括的なオンラインリサーチのために設計された自律エージェントです。**
30
+
31
+ このエージェントは、詳細で事実に基づいた偏りのない研究レポートを生成することができ、関連するリソース、アウトライン、およびレッスンに焦点を当てるためのカスタマイズオプションを提供します。最近の [Plan-and-Solve](https://arxiv.org/abs/2305.04091) および [RAG](https://arxiv.org/abs/2005.11401) 論文に触発され、GPT Researcher は速度、決定論、および信頼性の問題に対処し、同期操作ではなく並列化されたエージェント作業を通じてより安定したパフォーマンスと高速化を提供します。
32
+
33
+ **私たちの使命は、AIの力を活用して、個人や組織に正確で偏りのない事実に基づいた情報を提供することです。**
34
+
35
+ ## なぜGPT Researcherなのか?
36
+
37
+ - 手動の研究タスクで客観的な結論を形成するには時間がかかることがあり、適切なリソースと情報を見つけるのに数週間かかることもあります。
38
+ - 現在のLLMは過去の情報に基づいて訓練されており、幻覚のリスクが高く、研究タスクにはほとんど役に立ちません。
39
+ - 現在のLLMは短いトークン出力に制限されており、長く詳細な研究レポート(2,000語以上)には不十分です。
40
+ - Web検索を可能にするサービス(ChatGPT + Webプラグインなど)は、限られたリソースとコンテンツのみを考慮し、場合によっては表面的で偏った回答をもたらします。
41
+ - Webソースの選択のみを使用すると、研究タスクの正しい結論を導く際にバイアスが生じる可能性があります。
42
+
43
+ ## アーキテクチャ
44
+ 主なアイデアは、「プランナー」と「実行」エージェントを実行することであり、プランナーは研究する質問を生成し、実行エージェントは生成された各研究質問に基づいて最も関連性の高い情報を探します。最後に、プランナーはすべての関連情報をフィルタリングおよび集約し、研究レポートを作成します。<br /> <br />
45
+ エージェントは、研究タスクを完了するために gpt-4o-mini と gpt-4o(128K コンテキスト)の両方を活用します。必要に応じてそれぞれを使用することでコストを最適化します。**平均的な研究タスクは完了するのに約3分かかり、コストは約0.1ドルです**。
46
+
47
+ <div align="center">
48
+ <img align="center" height="500" src="https://cowriter-images.s3.amazonaws.com/architecture.png">
49
+ </div>
50
+
51
+
52
+ 詳細説明:
53
+ * 研究クエリまたはタスクに基づいて特定のドメインエージェントを作成します。
54
+ * 研究タスクに対する客観的な意見を形成する一連の研究質問を生成します。
55
+ * 各研究質問に対して、与えられたタスクに関連する情報をオンラインリソースから収集��るクローラーエージェントをトリガーします。
56
+ * 各収集されたリソースについて、関連情報に基づいて要約し、そのソースを追跡します。
57
+ * 最後に、すべての要約されたソースをフィルタリングおよび集約し、最終的な研究レポートを生成します。
58
+
59
+ ## デモ
60
+ https://github.com/assafelovic/gpt-researcher/assets/13554167/a00c89a6-a295-4dd0-b58d-098a31c40fda
61
+
62
+ ## チュートリアル
63
+ - [動作原理](https://docs.gptr.dev/blog/building-gpt-researcher)
64
+ - [インストール方法](https://www.loom.com/share/04ebffb6ed2a4520a27c3e3addcdde20?sid=da1848e8-b1f1-42d1-93c3-5b0b9c3b24ea)
65
+ - [ライブデモ](https://www.loom.com/share/6a3385db4e8747a1913dd85a7834846f?sid=a740fd5b-2aa3-457e-8fb7-86976f59f9b8)
66
+
67
+ ## 特徴
68
+ - 📝 研究、アウトライン、リソース、レッスンレポートを生成
69
+ - 🌐 各研究で20以上のWebソースを集約し、客観的で事実に基づいた結論を形成
70
+ - 🖥️ 使いやすいWebインターフェース(HTML/CSS/JS)を含む
71
+ - 🔍 JavaScriptサポート付きのWebソースをスクレイピング
72
+ - 📂 訪問および使用されたWebソースのコンテキストを追跡
73
+ - 📄 研究レポートをPDF、Wordなどにエクスポート
74
+
75
+ ## 📖 ドキュメント
76
+
77
+ 完全なドキュメントについては、[こちら](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started)を参照してください:
78
+
79
+ - 入門(インストール、環境設定、簡単な例)
80
+ - 操作例(デモ、統合、dockerサポート)
81
+ - 参考資料(API完全ドキュメント)
82
+ - Tavilyアプリケーションインターフェースの統合(コア概念の高度な説明)
83
+
84
+ ## クイックスタート
85
+ > **ステップ 0** - Python 3.11 以降をインストールします。[こちら](https://www.tutorialsteacher.com/python/install-python)を参照して、ステップバイステップのガイドを確認してください。
86
+
87
+ <br />
88
+
89
+ > **ステップ 1** - プロジェクトをダウンロードします
90
+
91
+ ```bash
92
+ $ git clone https://github.com/assafelovic/gpt-researcher.git
93
+ $ cd gpt-researcher
94
+ ```
95
+
96
+ <br />
97
+
98
+ > **ステップ2** - 依存関係をインストールします
99
+ ```bash
100
+ $ pip install -r requirements.txt
101
+ ```
102
+ <br />
103
+
104
+ > **ステップ 3** - OpenAI キーと Tavily API キーを使用して .env ファイルを作成するか、直接エクスポートします
105
+
106
+ ```bash
107
+ $ export OPENAI_API_KEY={Your OpenAI API Key here}
108
+ ```
109
+ ```bash
110
+ $ export TAVILY_API_KEY={Your Tavily API Key here}
111
+ ```
112
+
113
+ - **LLMには、[OpenAI GPT](https://platform.openai.com/docs/guides/gpt) を使用することをお勧めします**が、[Langchain Adapter](https://python.langchain.com/docs/guides/adapters/openai) がサポートする他の LLM モデル(オープンソースを含む)を使用することもできます。llm モデルとプロバイダーを config/config.py で変更するだけです。[このガイド](https://python.langchain.com/docs/integrations/llms/) に従って、LLM を Langchain と統合する方法を学んでください。
114
+ - **検索エンジンには、[Tavily Search API](https://app.tavily.com)(LLM 用に最適化されています)を使用することをお勧めします**が、他の検索エンジンを選択することもできます。config/config.py で検索プロバイダーを「duckduckgo」、「googleAPI」、「googleSerp」、「searchapi」、「searx」に変更するだけです。次に、config.py ファイルに対応する env API キーを追加します。
115
+ - **最適なパフォーマンスを得るために、[OpenAI GPT](https://platform.openai.com/docs/guides/gpt) モデルと [Tavily Search API](https://app.tavily.com) を使用することを強くお勧めします。**
116
+ <br />
117
+
118
+ > **ステップ 4** - FastAPI を使用してエージェントを実行します
119
+
120
+ ```bash
121
+ $ uvicorn main:app --reload
122
+ ```
123
+ <br />
124
+
125
+ > **ステップ 5** - 任意のブラウザで http://localhost:8000 にアクセスして、リサーチを楽しんでください!
126
+
127
+ Docker の使い方や機能とサービスの詳細については、[ドキュメント](https://docs.gptr.dev) ページをご覧ください。
128
+
129
+ ## 🚀 貢献
130
+ 私たちは貢献を大歓迎します!興味がある場合は、[貢献](CONTRIBUTING.md) をご覧ください。
131
+
132
+ 私たちの[ロードマップ](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) ページを確認し、私たちの使命に参加することに興味がある場合は、[Discord コミュニティ](https://discord.gg/QgZXvJAccX) を通じてお問い合わせください。
133
+
134
+ ## ✉️ サポート / お問い合わせ
135
+ - [コミュニティディスカッション](https://discord.gg/spBgZmm3Xe)
136
+ - 私たちのメール: [email protected]
137
+
138
+ ## 🛡 免責事項
139
+
140
+ このプロジェクト「GPT Researcher」は実験的なアプリケーションであり、明示または黙示のいかなる保証もなく「現状のまま」提供されます。私たちは学術目的のためにMITライセンスの下でコードを共有しています。ここに記載されている内容は学術的なアドバイスではなく、学術論文や研究論文での使用を推奨するものではありません。
141
+
142
+ 私たちの客観的な研究主張に対する見解:
143
+ 1. 私たちのスクレイピングシステムの主な目的は、不正確な事実を減らすことです。どうやって解決するのか?私たちがスクレイピングするサイトが多ければ多いほど、誤ったデータの可能性は低くなります。各研究で20の情報を収集し、それらがすべて間違っている可能性は非常に低いです。
144
+ 2. 私たちの目標はバイアスを排除することではなく、可能な限りバイアスを減らすことです。**私たちはここでコミュニティとして最も効果的な人間と機械の相互作用を探求しています**。
145
+ 3. 研究プロセスでは、人々も自分が研究しているトピックに対してすでに意見を持っているため、バイアスがかかりやすいです。このツールは多くの意見を収集し、偏った人が決して読まないであろう多様な見解を均等に説明します。
146
+
147
+ **GPT-4 言語モデルの使用は、トークンの使用により高額な費用がかかる可能性があることに注意してください**。このプロジェクトを利用することで、トークンの使用状況と関連する費用を監視および管理する責任があることを認めたことになります。OpenAI API の使用状況を定期的に確認し、予期しない料金が発生しないように必要な制限やアラートを設定することを強くお勧めします。
148
+
149
+ ---
150
+
151
+ <p align="center">
152
+ <a href="https://star-history.com/#assafelovic/gpt-researcher">
153
+ <picture>
154
+ <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date&theme=dark" />
155
+ <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
156
+ <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
157
+ </picture>
158
+ </a>
159
+ </p>
README-ko_KR.md ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <!--<h1 style="display: flex; align-items: center; gap: 10px;">
3
+ <img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/a45bac7c-092c-42e5-8eb6-69acbf20dde5" alt="Logo" width="25">
4
+ GPT Researcher
5
+ </h1>-->
6
+ <img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/20af8286-b386-44a5-9a83-3be1365139c3" alt="Logo" width="80">
7
+
8
+
9
+ ####
10
+
11
+ [![Website](https://img.shields.io/badge/Official%20Website-gptr.dev-teal?style=for-the-badge&logo=world&logoColor=white&color=0891b2)](https://gptr.dev)
12
+ [![Documentation](https://img.shields.io/badge/Documentation-DOCS-f472b6?logo=googledocs&logoColor=white&style=for-the-badge)](https://docs.gptr.dev)
13
+ [![Discord Follow](https://img.shields.io/discord/1127851779011391548?style=for-the-badge&logo=discord&label=Chat%20on%20Discord)](https://discord.gg/QgZXvJAccX)
14
+
15
+ [![PyPI version](https://img.shields.io/pypi/v/gpt-researcher?logo=pypi&logoColor=white&style=flat)](https://badge.fury.io/py/gpt-researcher)
16
+ ![GitHub Release](https://img.shields.io/github/v/release/assafelovic/gpt-researcher?style=flat&logo=github)
17
+ [![Open In Colab](https://img.shields.io/static/v1?message=Open%20in%20Colab&logo=googlecolab&labelColor=grey&color=yellow&label=%20&style=flat&logoSize=40)](https://colab.research.google.com/github/assafelovic/gpt-researcher/blob/master/docs/docs/examples/pip-run.ipynb)
18
+ [![Docker Image Version](https://img.shields.io/docker/v/elestio/gpt-researcher/latest?arch=amd64&style=flat&logo=docker&logoColor=white&color=1D63ED)](https://hub.docker.com/r/gptresearcher/gpt-researcher)
19
+ [![Twitter Follow](https://img.shields.io/twitter/follow/assaf_elovic?style=social)](https://twitter.com/assaf_elovic)
20
+
21
+ [English](README.md) |
22
+ [中文](README-zh_CN.md) |
23
+ [日本語](README-ja_JP.md) |
24
+ [한국어](README-ko_KR.md)
25
+ </div>
26
+
27
+ # 🔎 GPT Researcher
28
+
29
+ **GPT Researcher는 다양한 작업을 대해 포괄적인 온라인 연구를 수행하도록 설계된 자율 에이전트입니다.**
30
+
31
+ 이 에이전트는 세부적이고 사실에 기반하며 편견 없는 연구 보고서를 생성할 수 있으며, 관련 리소스와 개요에 초점을 맞춘 맞춤형 옵션을 제공합니다. 최근 발표된 [Plan-and-Solve](https://arxiv.org/abs/2305.04091) 및 [RAG](https://arxiv.org/abs/2005.11401) 논문에서 영감을 받아 GPT Researcher는 잘못된 정보, 속도, 결정론적 접근 방식, 신뢰성 문제를 해결하고, 동기화 작업이 아닌 병렬 에이전트 작업을 통해 더 안정적이고 빠른 성능을 제공합니다.
32
+
33
+ **우리의 목표는 AI의 힘을 활용하여 개인과 조직에게 정확하고 편향 없는 사실에 기반한 정보를 제공하는 것입니다.**
34
+
35
+ ## 왜 GPT Researcher인가?
36
+
37
+ - 직접 수행하는 연구 과정은 객관적인 결론을 도출하는 데 시간이 오래 걸리며, 적절한 리소스와 정보를 찾는 데 몇 주가 걸릴 수 있습니다.
38
+ - 현재의 대규모 언어 모델(LLM)은 과거 정보에 기반해 훈련되었으며, 환각 현상이 발생할 위험이 높아 연구 작업에는 적합하지 않습니다.
39
+ - 현재 LLM은 짧은 토큰 출력으로 제한되며, 2,000단어 이상의 길고 자세한 연구 보고서를 작성하는 데는 충분하지 않습니다.
40
+ - 웹 검색을 지원하는 서비스(예: ChatGPT 또는 Perplexity)는 제한된 리소스와 콘텐츠만을 고려하여 경우에 따라 피상적이고 편향된 답변을 제공합니다.
41
+ - 웹 소스만을 사용하면 연구 작업에서 올바른 결론을 도출할 때 편향이 발생할 수 있습니다.
42
+
43
+ ## 데모
44
+ https://github.com/user-attachments/assets/092e9e71-7e27-475d-8c4f-9dddd28934a3
45
+
46
+ ## 아키텍처
47
+ 주요 아이디어는 "플래너"와 "실행" 에이전트를 실행하는 것으로, 플래너는 연구할 질문을 생성하고, 실행 에이전트는 생성된 각 연구 질문에 따라 가장 관련성 높은 정보를 찾습니다. 마지막으로 플래너는 모든 관련 정보를 필터링하고 집계하여 연구 보고서를 작성합니다.
48
+ <br /> <br />
49
+ 에이전트는 `gpt-4o-mini`와 `gpt-4o`(128K 컨텍스트)를 활용하여 연구 작업을 완료합니다. 필요에 따라 각각을 사용하여 비용을 최적화합니다. **평균 연구 작업은 약 2분이 소요되며, 비용은 약 $0.005입니다.**.
50
+
51
+ <div align="center">
52
+ <img align="center" height="600" src="https://github.com/assafelovic/gpt-researcher/assets/13554167/4ac896fd-63ab-4b77-9688-ff62aafcc527">
53
+ </div>
54
+
55
+ 구체적으로:
56
+ * 연구 쿼리 또는 작업을 기반으로 도메인별 에이전트를 생성합니다.
57
+ * 주어진 작업에 대해 객관적인 의견을 형성할 수 있는 일련의 연구 질문을 생성합니다.
58
+ * 각 연구 질문에 대해 크롤러 에이전트를 실행하여 작업과 관련된 정보를 온라인 리소스에서 수집합니다.
59
+ * 수집된 각 리소스에서 관련 정보를 요약하고 출처를 기록합니다.
60
+ * 마지��으로, 요약된 모든 정보를 필터링하고 집계하여 최종 연구 보고서를 생성합니다.
61
+
62
+ ## 튜토리얼
63
+ - [동작원리](https://docs.gptr.dev/blog/building-gpt-researcher)
64
+ - [설치방법](https://www.loom.com/share/04ebffb6ed2a4520a27c3e3addcdde20?sid=da1848e8-b1f1-42d1-93c3-5b0b9c3b24ea)
65
+ - [라이브 데모](https://www.loom.com/share/6a3385db4e8747a1913dd85a7834846f?sid=a740fd5b-2aa3-457e-8fb7-86976f59f9b8)
66
+
67
+
68
+ ## 기능
69
+ - 📝 로컬 문서 및 웹 소스를 사용하여 연구, 개요, 리소스 및 학습 보고서 생성
70
+ - 📜 2,000단어 이상의 길고 상세한 연구 보고서 생성 가능
71
+ - 🌐 연구당 20개 이상의 웹 소스를 집계하여 객관적이고 사실에 기반한 결론 도출
72
+ - 🖥️ 경량 HTML/CSS/JS와 프로덕션용 (NextJS + Tailwind) UX/UI 포함
73
+ - 🔍 자바스크립트 지원 웹 소스 스크래핑 기능
74
+ - 📂 연구 과정에서 맥락과 메모리 추적 및 유지
75
+ - 📄 연구 보고서를 PDF, Word 등으로 내보내기 지원
76
+
77
+ ## 📖 문서
78
+
79
+ 전체 문서(설치, 환경 설정, 간단한 예시)를 보려면 [여기](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started)를 참조하세요.
80
+
81
+ - 시작하기 (설치, 환경 설정, 간단한 예시)
82
+ - 맞춤 설정 및 구성
83
+ - 사용 방법 예시 (데모, 통합, 도커 지원)
84
+ - 참고자료 (전체 API 문서)
85
+
86
+ ## ⚙️ 시작하기
87
+ ### 설치
88
+ > **1단계** - Python 3.11 또는 그 이상의 버전을 설치하세요. [여기](https://www.tutorialsteacher.com/python/install-python)를 참조하여 단계별 가이드를 확인하세요.
89
+
90
+ > **2단계** - 프로젝트를 다운로드하고 해당 디렉토리로 이동하세요.
91
+
92
+ ```bash
93
+ git clone https://github.com/assafelovic/gpt-researcher.git
94
+ cd gpt-researcher
95
+ ```
96
+
97
+ > **3단계** - 두 가지 방법으로 API 키를 설정하세요: 직접 export하거나 `.env` 파일에 저장하세요.
98
+
99
+ Linux/Windows에서 임시 설정을 하려면 export 방법을 사용하세요:
100
+
101
+ ```bash
102
+ export OPENAI_API_KEY={OpenAI API 키 입력}
103
+ export TAVILY_API_KEY={Tavily API 키 입력}
104
+ ```
105
+
106
+ 더 영구적인 설정을 원한다면, 현재의 `gpt-researcher` 디렉토리에 `.env` 파일을 생성하고 환경 변수를 입력하세요 (export 없이).
107
+
108
+ - 기본 LLM은 [GPT](https://platform.openai.com/docs/guides/gpt)이지만, `claude`, `ollama3`, `gemini`, `mistral` 등 다른 LLM도 사용할 수 있습니다. LLM 제공자를 변경하는 방법은 [LLMs 문서](https://docs.gptr.dev/docs/gpt-researcher/llms/llms)를 참조하세요. 이 프로젝트는 OpenAI GPT 모델에 최적화되어 있습니다.
109
+ - 기본 검색기는 [Tavily](https://app.tavily.com)이지만, `duckduckgo`, `google`, `bing`, `searchapi`, `serper`, `searx`, `arxiv`, `exa` 등의 검색기를 사용할 수 있습니다. 검색 제공자를 변경하는 방법은 [검색기 문서](https://docs.gptr.dev/docs/gpt-researcher/retrievers)를 참조하세요.
110
+
111
+ ### 빠른 시작
112
+
113
+ > **1단계** - 필요한 종속성 설치
114
+
115
+ ```bash
116
+ pip install -r requirements.txt
117
+ ```
118
+
119
+ > **2단계** - FastAPI로 에이전트 실행
120
+
121
+ ```bash
122
+ python -m uvicorn main:app --reload
123
+ ```
124
+
125
+ > **3단계** - 브라우저에서 http://localhost:8000 으로 이동하여 연구를 시작하세요!
126
+
127
+ <br />
128
+
129
+ **[Poetry](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started#poetry) 또는 [가상 환경](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started#virtual-environment)에 대해 배우고 싶다면, [문서](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started)를 참조하세요.**
130
+
131
+ ### PIP 패키지로 실행하기
132
+ ```bash
133
+ pip install gpt-researcher
134
+ ```
135
+
136
+ ```python
137
+ ...
138
+ from gpt_researcher import GPTResearcher
139
+
140
+ query = "왜 Nvidia 주식이 오르고 있나요?"
141
+ researcher = GPTResearcher(query=query, report_type="research_report")
142
+ # 주어진 질문에 대한 연구 수행
143
+ research_result = await researcher.conduct_research()
144
+ # 보고서 작성
145
+ report = await researcher.write_report()
146
+ ...
147
+ ```
148
+
149
+ **더 많은 예제와 구성 옵션은 [PIP 문서](https://docs.gptr.dev/docs/gpt-researcher/gptr/pip-package)를 참조하세요.**
150
+
151
+ ## Docker로 실행
152
+
153
+ > **1단계** - [Docker 설치](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started-with-docker)
154
+
155
+ > **2단계** - `.env.example` 파일을 복사하고 API 키를 추가한 후, 파일을 `.env`로 저장하세요.
156
+
157
+ > **3단계** - docker-compose 파일에서 실행하고 싶지 않은 서비스를 주석 처리하세요.
158
+
159
+ ```bash
160
+ $ docker-compose up --build
161
+ ```
162
+
163
+ > **4단계** - docker-compose 파일에서 아무 것도 주석 처리하지 않았다면, 기본적으로 두 가지 프로세스가 시작됩니다:
164
+ - localhost:8000에서 실행 중인 Python 서버<br>
165
+ - localhost:3000에서 실행 중인 React 앱<br>
166
+
167
+ 브라우저에서 localhost:3000으로 이동하여 연구를 시작하세요!
168
+
169
+ ## 📄 로컬 문서로 연구하기
170
+
171
+ GPT Researcher를 사용하여 로컬 문서를 기반으로 연구 작업을 수행할 수 있습니다. 현재 지원되는 파일 형식은 PDF, 일반 텍스트, CSV, Excel, Markdown, PowerPoint, Word 문서입니다.
172
+
173
+ 1단계: `DOC_PATH` 환경 변수를 설정하여 문서가 있는 폴더를 지정하세요.
174
+
175
+ ```bash
176
+ export DOC_PATH="./my-docs"
177
+ ```
178
+
179
+ 2단계:
180
+ - 프론트엔드 앱을 localhost:8000에서 실행 중이라면, "Report Source" 드롭다운 옵션에서 "My Documents"를 선택하세요.
181
+ - GPT Researcher를 [PIP 패키지](https://docs.tavily.com/docs/gpt-researcher/pip-package)로 실행 중이라면, `report_source` 인수를 "local"로 설정하여 `GPTResearcher` 클래스를 인스턴스화하세요. [코드 예제](https://docs.gptr.dev/docs/gpt-researcher/context/tailored-research)를 참조하세요.
182
+
183
+ ## 👪 다중 에이전트 어시스턴트
184
+
185
+ AI가 프롬프트 엔지니어링 및 RAG에서 다중 에이전트 시스템으로 발전함에 따라, 우리는 [LangGraph](https://python.langchain.com/v0.1/docs/langgraph/)로 구축된 새로운 다중 에이전트 어시스턴트를 소개합니다.
186
+
187
+ LangGraph를 사용하면 여러 에이전트의 전문 기술을 활용하여 연구 과정의 깊이와 질을 크게 향상시킬 수 있습니다. 최근 [STORM](https://arxiv.org/abs/2402.14207) 논문에서 영감을 받아, 이 프로젝트는 AI 에이전트 팀이 주제에 대한 연구를 계획에서 출판까지 함께 수행하는 방법을 보여줍니다.
188
+
189
+ 평균 실행은 5-6 페이지 분량의 연구 보고서를 PDF, Docx, Markdown 형식으로 생성합니다.
190
+
191
+ [여기](https://github.com/assafelovic/gpt-researcher/tree/master/multi_agents)에서 확인하거나 [문서](https://docs.gptr.dev/docs/gpt-researcher/multi_agents/langgraph)에서 자세한 내용을 참조하세요.
192
+
193
+ ## 🖥️ 프론트엔드 애플리케이션
194
+
195
+ GPT-Researcher는 사용자 경험을 개선하고 연구 프로세스를 간소화하기 위해 향상된 프론트엔드를 제공합니다. 프론트엔드는 다음과 같은 기능을 제공합니다:
196
+
197
+ - 연구 쿼리를 입력할 수 있는 직관적인 인터페이스
198
+ - 연구 작업의 실시간 진행 상황 추적
199
+ - 연구 결과의 대화형 디스플레이
200
+ - 맞춤형 연구 경험을 위한 설정 가능
201
+
202
+ 두 가지 배포 옵션이 있습니다:
203
+ 1. FastAPI로 제공되는 경량 정적 프론트엔드
204
+ 2. 고급 기능을 제공하는 NextJS 애플리케이션
205
+
206
+ 프론트엔드 기능에 대한 자세한 설치 방법 및 정보를 원하시면 [문서 페이지](https://docs.gptr.dev/docs/gpt-researcher/frontend/frontend)를 참조하세요.
207
+
208
+ ## 🚀 기여하기
209
+ 우리는 기여를 적극 환영합니다! 관심이 있다면 [기여 가이드](https://github.com/assafelovic/gpt-researcher/blob/master/CONTRIBUTING.md)를 확인해 주세요.
210
+
211
+ [로드맵](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) 페이지를 확인하고, 우리 [Discord 커뮤니티](https://discord.gg/QgZXvJAccX)에 가입하여 우리의 목표에 함께 참여해 주세요.
212
+ <a href="https://github.com/assafelovic/gpt-researcher/graphs/contributors">
213
+ <img src="https://contrib.rocks/image?repo=assafelovic/gpt-researcher" />
214
+ </a>
215
+
216
+ ## ✉️ 지원 / 문의
217
+ - [커뮤니티 Discord](https://discord.gg/spBgZmm3Xe)
218
+ - 저자 이메일: [email protected]
219
+
220
+ ## 🛡️ 면책 조항
221
+
222
+ 이 프로젝트인 GPT Researcher는 실험적인 응용 프로그램이며, 명시적이거나 묵시적인 보증 없이 "있는 그대로" 제공됩니다. 우리는 이 코드를 학술적 목적으로 Apache 2 라이선스 하에 공유하고 있습니다. 여기에 있는 것은 학술적 조언이 아니며, 학술 또는 연구 논문에 사용하는 것을 권장하지 않습니다.
223
+
224
+ 편향되지 않은 연구 주장에 대한 우리의 견해:
225
+ 1. GPT Researcher의 주요 목표는 잘못된 정보와 편향된 사실을 줄이는 것입니다. 그 방법은 무엇일까요? 우리는 더 많은 사이트를 스크래핑할수록 잘못된 데이터의 가능성이 줄어든다고 가정합니다. 여러 사이트에서 정보를 스크래핑하고 가장 빈번한 정보를 선택하면, 모든 정보가 틀릴 확률은 매우 낮습니다.
226
+ 2. 우리는 편향을 완전히 제거하려고 하지는 않지만, 가능한 한 줄이는 것을 목표로 합니다. **우리는 인간과 LLM의 가장 효과적인 상호작용을 찾기 위한 커뮤니티입니다.**
227
+ 3. 연구에서 사람들도 이미 자신이 연구하는 주제에 대해 의견을 가지고 있기 때문에 편향되는 경향이 있습니다. 이 도구는 많은 의견을 스크래핑하며, 편향된 사람이라면 결코 읽지 않았을 다양한 견해를 고르게 설명합니다.
228
+
229
+ **GPT-4 모델을 사용할 경우, 토큰 사용량 때문에 비용이 많이 들 수 있습니다.** 이 프로젝트를 사���하는 경우, 자신의 토큰 사용량 및 관련 비용을 모니터링하고 관리하는 것은 본인의 책임입니다. OpenAI API 사용량을 정기적으로 확인하고, 예상치 못한 비용을 방지하기 위해 필요한 한도를 설정하거나 알림을 설정하는 것이 좋습니다.
230
+
231
+
232
+ ---
233
+
234
+ <p align="center">
235
+ <a href="https://star-history.com/#assafelovic/gpt-researcher">
236
+ <picture>
237
+ <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date&theme=dark" />
238
+ <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
239
+ <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
240
+ </picture>
241
+ </a>
242
+ </p>
README-zh_CN.md ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ <!--<h1 style="display: flex; align-items: center; gap: 10px;">
3
+ <img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/a45bac7c-092c-42e5-8eb6-69acbf20dde5" alt="Logo" width="25">
4
+ GPT Researcher
5
+ </h1>-->
6
+ <img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/20af8286-b386-44a5-9a83-3be1365139c3" alt="Logo" width="80">
7
+
8
+
9
+ ####
10
+
11
+ [![Website](https://img.shields.io/badge/Official%20Website-gptr.dev-teal?style=for-the-badge&logo=world&logoColor=white&color=0891b2)](https://gptr.dev)
12
+ [![Documentation](https://img.shields.io/badge/Documentation-DOCS-f472b6?logo=googledocs&logoColor=white&style=for-the-badge)](https://docs.gptr.dev)
13
+ [![Discord Follow](https://img.shields.io/discord/1127851779011391548?style=for-the-badge&logo=discord&label=Chat%20on%20Discord)](https://discord.gg/QgZXvJAccX)
14
+
15
+ [![PyPI version](https://img.shields.io/pypi/v/gpt-researcher?logo=pypi&logoColor=white&style=flat)](https://badge.fury.io/py/gpt-researcher)
16
+ ![GitHub Release](https://img.shields.io/github/v/release/assafelovic/gpt-researcher?style=flat&logo=github)
17
+ [![Open In Colab](https://img.shields.io/static/v1?message=Open%20in%20Colab&logo=googlecolab&labelColor=grey&color=yellow&label=%20&style=flat&logoSize=40)](https://colab.research.google.com/github/assafelovic/gpt-researcher/blob/master/docs/docs/examples/pip-run.ipynb)
18
+ [![Docker Image Version](https://img.shields.io/docker/v/elestio/gpt-researcher/latest?arch=amd64&style=flat&logo=docker&logoColor=white&color=1D63ED)](https://hub.docker.com/r/gptresearcher/gpt-researcher)
19
+ [![Twitter Follow](https://img.shields.io/twitter/follow/assaf_elovic?style=social)](https://twitter.com/assaf_elovic)
20
+
21
+ [English](README.md) |
22
+ [中文](README-zh_CN.md) |
23
+ [日本語](README-ja_JP.md) |
24
+ [한국어](README-ko_KR.md)
25
+ </div>
26
+
27
+ # 🔎 GPT Researcher
28
+
29
+ **GPT Researcher 是一个智能体代理,专为各种任务的综合在线研究而设计。**
30
+
31
+ 代理可以生成详细、正式且客观的研究报告,并提供自定义选项,专注于相关资源、结构框架和经验报告。受最近发表的[Plan-and-Solve](https://arxiv.org/abs/2305.04091) 和[RAG](https://arxiv.org/abs/2005.11401) 论文的启发,GPT Researcher 解决了速度、确定性和可靠性等问题,通过并行化的代理运行,而不是同步操作,提供了更稳定的性能和更高的速度。
32
+
33
+ **我们的使命是利用人工智能的力量,为个人和组织提供准确、客观和事实的信息。**
34
+
35
+ ## 为什么选择GPT Researcher?
36
+
37
+ - 因为人工研究任务形成客观结论可能需要时间和经历,有时甚至需要数周才能找到正确的资源和信息。
38
+ - 目前的LLM是根据历史和过时的信息进行训练的,存在严重的幻觉风险,因此几乎无法胜任研究任务。
39
+ - 网络搜索的解决方案(例如 ChatGPT + Web 插件)仅考虑有限的资源和内容,在某些情况下会导致肤浅的结论或不客观的答案。
40
+ - 只使用部分资源可能会在确定研究问题或任务的正确结论时产生偏差。
41
+
42
+ ## 架构
43
+ 主要思想是运行“**计划者**”和“**执行**”代理,而**计划者**生成问题进行研究,“**执行**”代理根据每个生成的研究问题寻找最相关的信息。最后,“**计划者**”过滤和聚合所有相关信息并创建研究报告。<br /> <br />
44
+ 代理同时利用 gpt-40-mini 和 gpt-4o(128K 上下文)来完成一项研究任务。我们仅在必要时使用这两种方法对成本进行优化。**研究任务平均耗时约 3 分钟,成本约为 ~0.1 美元**。
45
+
46
+ <div align="center">
47
+ <img align="center" height="500" src="https://cowriter-images.s3.amazonaws.com/architecture.png">
48
+ </div>
49
+
50
+
51
+ 详细说明:
52
+ * 根据研究搜索或任务创建特定领域的代理。
53
+ * 生成一组研究问题,这些问题共同形成答案对任何给定任务的客观意见。
54
+ * 针对每个研究问题,触发一个爬虫代理,从在线资源中搜索与给定任务相关的信息。
55
+ * 对于每一个抓取的资源,根据相关信息进行汇总,并跟踪其来源。
56
+ * 最后,对所有汇总的资料来源进行过滤和汇总,并生成最终研究报告。
57
+
58
+ ## 演示
59
+ https://github.com/assafelovic/gpt-researcher/assets/13554167/a00c89a6-a295-4dd0-b58d-098a31c40fda
60
+
61
+ ## 教程
62
+ - [运行原理](https://docs.gptr.dev/blog/building-gpt-researcher)
63
+ - [如何安装](https://www.loom.com/share/04ebffb6ed2a4520a27c3e3addcdde20?sid=da1848e8-b1f1-42d1-93c3-5b0b9c3b24ea)
64
+ - [现场演示](https://www.loom.com/share/6a3385db4e8747a1913dd85a7834846f?sid=a740fd5b-2aa3-457e-8fb7-86976f59f9b8)
65
+
66
+ ## 特性
67
+ - 📝 生成研究问题、大纲、资源和课题报告
68
+ - 🌐 每项研究汇总超过20个网络资源,形成客观和真实的结论
69
+ - 🖥️ 包括易于使用的web界面 (HTML/CSS/JS)
70
+ - 🔍 支持JavaScript网络资源抓取功能
71
+ - 📂 追踪访问过和使用过的网络资源和来源
72
+ - 📄 ���研究报告导出为PDF或其他格式...
73
+
74
+ ## 📖 文档
75
+
76
+ 请参阅[此处](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started),了解完整文档:
77
+
78
+ - 入门(安装、设置环境、简单示例)
79
+ - 操作示例(演示、集成、docker 支持)
80
+ - 参考资料(API完整文档)
81
+ - Tavily 应用程序接口集成(核心概念的高级解释)
82
+
83
+ ## 快速开始
84
+ > **步骤 0** - 安装 Python 3.11 或更高版本。[参见此处](https://www.tutorialsteacher.com/python/install-python) 获取详细指南。
85
+
86
+ <br />
87
+
88
+ > **步骤 1** - 下载项目
89
+
90
+ ```bash
91
+ $ git clone https://github.com/assafelovic/gpt-researcher.git
92
+ $ cd gpt-researcher
93
+ ```
94
+
95
+ <br />
96
+
97
+ > **步骤2** -安装依赖项
98
+ ```bash
99
+ $ pip install -r requirements.txt
100
+ ```
101
+ <br />
102
+
103
+ > **第 3 步** - 使用 OpenAI 密钥和 Tavily API 密钥创建 .env 文件,或直接导出该文件
104
+
105
+ ```bash
106
+ $ export OPENAI_API_KEY={Your OpenAI API Key here}
107
+ ```
108
+ ```bash
109
+ $ export TAVILY_API_KEY={Your Tavily API Key here}
110
+ ```
111
+
112
+ - **LLM,我们推荐使用 [OpenAI GPT](https://platform.openai.com/docs/guides/gpt)**,但您也可以使用 [Langchain Adapter](https://python.langchain.com/docs/guides/adapters/openai) 支持的任何其他 LLM 模型(包括开源),只需在 config/config.py 中更改 llm 模型和提供者即可。请按照 [这份指南](https://python.langchain.com/docs/integrations/llms/) 学习如何将 LLM 与 Langchain 集成。
113
+ - **对于搜索引擎,我们推荐使用 [Tavily Search API](https://app.tavily.com)(已针对 LLM 进行优化)**,但您也可以选择其他搜索引擎,只需将 config/config.py 中的搜索提供程序更改为 "duckduckgo"、"googleAPI"、"searchapi"、"googleSerp "或 "searx "即可。然后在 config.py 文件中添加相应的 env API 密钥。
114
+ - **我们强烈建议使用 [OpenAI GPT](https://platform.openai.com/docs/guides/gpt) 模型和 [Tavily Search API](https://app.tavily.com) 以获得最佳性能。**
115
+ <br />
116
+
117
+ > **第 4 步** - 使用 FastAPI 运行代理
118
+
119
+ ```bash
120
+ $ uvicorn main:app --reload
121
+ ```
122
+ <br />
123
+
124
+ > **第 5 步** - 在任何浏览器上访问 http://localhost:8000,享受研究乐趣!
125
+
126
+ 要了解如何开始使用 Docker 或了解有关功能和服务的更多信息,请访问 [documentation](https://docs.gptr.dev) 页面。
127
+
128
+ ## 🚀 贡献
129
+ 我们非常欢迎您的贡献!如果您感兴趣,请查看 [contributing](CONTRIBUTING.md)。
130
+
131
+ 如果您有兴趣加入我们的任务,请查看我们的 [路线图](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) 页面,并通过我们的 [Discord 社区](https://discord.gg/QgZXvJAccX) 联系我们。
132
+
133
+ ## ✉️ 支持 / 联系我们
134
+ - [社区讨论区](https://discord.gg/spBgZmm3Xe)
135
+ - 我们的邮箱: [email protected]
136
+
137
+ ## 🛡 免责声明
138
+
139
+ 本项目 "GPT Researcher "是一个实验性应用程序,按 "现状 "提供,不做任何明示或暗示的保证。我们根据 MIT 许可分享用于学术目的的代码。本文不提供任何学术建议,也不建议在学术或研究论文中使用。
140
+
141
+ 我们对客观研究主张的看法:
142
+ 1. 我们抓取系统的全部目的是减少不正确的事实。如何解决?我们抓取的网站越多,错误数据的可能性就越小。我们每项研究都会收集20条信息,它们全部错误的可能性极低。
143
+ 2. 我们的目标不是消除偏见,而是尽可能减少偏见。**作为一个社区,我们在这里探索最有效的人机互动**。
144
+ 3. 在研究过程中,人们也容易产生偏见,因为大多数人对自己研究的课题都有自己的看法。这个工具可以搜罗到许多观点,并均匀地解释各种不同的观点,而有偏见的人是绝对读不到这些观点的。
145
+
146
+ **请注意,使用 GPT-4 语言模型可能会因使用令牌而产生高昂费用**。使用本项目即表示您承认有责任监控和管理自己的令牌使用情况及相关费用。强烈建议您定期检查 OpenAI API 的使用情况,并设置任何必要的限制或警报,以防止发生意外费用。
147
+
148
+ ---
149
+
150
+ <p align="center">
151
+ <a href="https://star-history.com/#assafelovic/gpt-researcher">
152
+ <picture>
153
+ <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date&theme=dark" />
154
+ <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
155
+ <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
156
+ </picture>
157
+ </a>
158
+ </p>
README.md CHANGED
@@ -1,11 +1,231 @@
1
- ---
2
- title: GPT Researcher
3
- emoji: 👁
4
- colorFrom: indigo
5
- colorTo: red
6
- sdk: static
7
- pinned: false
8
- license: apache-2.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center" id="top">
2
+
3
+ <img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/20af8286-b386-44a5-9a83-3be1365139c3" alt="Logo" width="80">
4
+
5
+ ####
6
+
7
+ [![Website](https://img.shields.io/badge/Official%20Website-gptr.dev-teal?style=for-the-badge&logo=world&logoColor=white&color=0891b2)](https://gptr.dev)
8
+ [![Documentation](https://img.shields.io/badge/Documentation-DOCS-f472b6?logo=googledocs&logoColor=white&style=for-the-badge)](https://docs.gptr.dev)
9
+ [![Discord Follow](https://dcbadge.vercel.app/api/server/QgZXvJAccX?style=for-the-badge&theme=clean-inverted&?compact=true)](https://discord.gg/QgZXvJAccX)
10
+
11
+ [![PyPI version](https://img.shields.io/pypi/v/gpt-researcher?logo=pypi&logoColor=white&style=flat)](https://badge.fury.io/py/gpt-researcher)
12
+ ![GitHub Release](https://img.shields.io/github/v/release/assafelovic/gpt-researcher?style=flat&logo=github)
13
+ [![Open In Colab](https://img.shields.io/static/v1?message=Open%20in%20Colab&logo=googlecolab&labelColor=grey&color=yellow&label=%20&style=flat&logoSize=40)](https://colab.research.google.com/github/assafelovic/gpt-researcher/blob/master/docs/docs/examples/pip-run.ipynb)
14
+ [![Docker Image Version](https://img.shields.io/docker/v/elestio/gpt-researcher/latest?arch=amd64&style=flat&logo=docker&logoColor=white&color=1D63ED)](https://hub.docker.com/r/gptresearcher/gpt-researcher)
15
+ [![Twitter Follow](https://img.shields.io/twitter/follow/assaf_elovic?style=social)](https://twitter.com/assaf_elovic)
16
+
17
+ [English](README.md) | [中文](README-zh_CN.md) | [日本語](README-ja_JP.md) | [한국어](README-ko_KR.md)
18
+
19
+ </div>
20
+
21
+ # 🔎 GPT Researcher
22
+
23
+ **GPT Researcher is an autonomous agent designed for comprehensive web and local research on any given task.**
24
+
25
+ The agent produces detailed, factual, and unbiased research reports with citations. GPT Researcher provides a full suite of customization options to create tailor made and domain specific research agents. Inspired by the recent [Plan-and-Solve](https://arxiv.org/abs/2305.04091) and [RAG](https://arxiv.org/abs/2005.11401) papers, GPT Researcher addresses misinformation, speed, determinism, and reliability by offering stable performance and increased speed through parallelized agent work.
26
+
27
+ **Our mission is to empower individuals and organizations with accurate, unbiased, and factual information through AI.**
28
+
29
+ ## Why GPT Researcher?
30
+
31
+ - Objective conclusions for manual research can take weeks, requiring vast resources and time.
32
+ - LLMs trained on outdated information can hallucinate, becoming irrelevant for current research tasks.
33
+ - Current LLMs have token limitations, insufficient for generating long research reports.
34
+ - Limited web sources in existing services lead to misinformation and shallow results.
35
+ - Selective web sources can introduce bias into research tasks.
36
+
37
+ ## Demo
38
+ https://github.com/user-attachments/assets/2cc38f6a-9f66-4644-9e69-a46c40e296d4
39
+
40
+ ## Architecture
41
+
42
+ The core idea is to utilize 'planner' and 'execution' agents. The planner generates research questions, while the execution agents gather relevant information. The publisher then aggregates all findings into a comprehensive report.
43
+
44
+ <div align="center">
45
+ <img align="center" height="600" src="https://github.com/assafelovic/gpt-researcher/assets/13554167/4ac896fd-63ab-4b77-9688-ff62aafcc527">
46
+ </div>
47
+
48
+ Steps:
49
+ * Create a task-specific agent based on a research query.
50
+ * Generate questions that collectively form an objective opinion on the task.
51
+ * Use a crawler agent for gathering information for each question.
52
+ * Summarize and source-track each resource.
53
+ * Filter and aggregate summaries into a final research report.
54
+
55
+ ## Tutorials
56
+ - [How it Works](https://docs.gptr.dev/blog/building-gpt-researcher)
57
+ - [How to Install](https://www.loom.com/share/04ebffb6ed2a4520a27c3e3addcdde20?sid=da1848e8-b1f1-42d1-93c3-5b0b9c3b24ea)
58
+ - [Live Demo](https://www.loom.com/share/6a3385db4e8747a1913dd85a7834846f?sid=a740fd5b-2aa3-457e-8fb7-86976f59f9b8)
59
+
60
+ ## Features
61
+
62
+ - 📝 Generate detailed research reports using web and local documents.
63
+ - 🖼️ Smart image scraping and filtering for reports.
64
+ - 📜 Generate detailed reports exceeding 2,000 words.
65
+ - 🌐 Aggregate over 20 sources for objective conclusions.
66
+ - 🖥️ Frontend available in lightweight (HTML/CSS/JS) and production-ready (NextJS + Tailwind) versions.
67
+ - 🔍 JavaScript-enabled web scraping.
68
+ - 📂 Maintains memory and context throughout research.
69
+ - 📄 Export reports to PDF, Word, and other formats.
70
+
71
+ ## 📖 Documentation
72
+
73
+ See the [Documentation](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started) for:
74
+ - Installation and setup guides
75
+ - Configuration and customization options
76
+ - How-To examples
77
+ - Full API references
78
+
79
+ ## ⚙️ Getting Started
80
+
81
+ ### Installation
82
+
83
+ 1. Install Python 3.11 or later. [Guide](https://www.tutorialsteacher.com/python/install-python).
84
+ 2. Clone the project and navigate to the directory:
85
+
86
+ ```bash
87
+ git clone https://github.com/assafelovic/gpt-researcher.git
88
+ cd gpt-researcher
89
+ ```
90
+
91
+ 3. Set up API keys by exporting them or storing them in a `.env` file.
92
+
93
+ ```bash
94
+ export OPENAI_API_KEY={Your OpenAI API Key here}
95
+ export TAVILY_API_KEY={Your Tavily API Key here}
96
+ ```
97
+
98
+ 4. Install dependencies and start the server:
99
+
100
+ ```bash
101
+ pip install -r requirements.txt
102
+ python -m uvicorn main:app --reload
103
+ ```
104
+
105
+ Visit [http://localhost:8000](http://localhost:8000) to start.
106
+
107
+ For other setups (e.g., Poetry or virtual environments), check the [Getting Started page](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started).
108
+
109
+ ## Run as PIP package
110
+ ```bash
111
+ pip install gpt-researcher
112
+
113
+ ```
114
+ ### Example Usage:
115
+ ```python
116
+ ...
117
+ from gpt_researcher import GPTResearcher
118
+
119
+ query = "why is Nvidia stock going up?"
120
+ researcher = GPTResearcher(query=query, report_type="research_report")
121
+ # Conduct research on the given query
122
+ research_result = await researcher.conduct_research()
123
+ # Write the report
124
+ report = await researcher.write_report()
125
+ ...
126
+ ```
127
+
128
+ **For more examples and configurations, please refer to the [PIP documentation](https://docs.gptr.dev/docs/gpt-researcher/gptr/pip-package) page.**
129
+
130
+
131
+ ## Run with Docker
132
+
133
+ > **Step 1** - [Install Docker](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started-with-docker)
134
+
135
+ > **Step 2** - Clone the '.env.example' file, add your API Keys to the cloned file and save the file as '.env'
136
+
137
+ > **Step 3** - Within the docker-compose file comment out services that you don't want to run with Docker.
138
+
139
+ ```bash
140
+ docker-compose up --build
141
+ ```
142
+
143
+ If that doesn't work, try running it without the dash:
144
+ ```bash
145
+ docker compose up --build
146
+ ```
147
+
148
+
149
+ > **Step 4** - By default, if you haven't uncommented anything in your docker-compose file, this flow will start 2 processes:
150
+ - the Python server running on localhost:8000<br>
151
+ - the React app running on localhost:3000<br>
152
+
153
+ Visit localhost:3000 on any browser and enjoy researching!
154
+
155
+
156
+
157
+ ## 📄 Research on Local Documents
158
+
159
+ You can instruct the GPT Researcher to run research tasks based on your local documents. Currently supported file formats are: PDF, plain text, CSV, Excel, Markdown, PowerPoint, and Word documents.
160
+
161
+ Step 1: Add the env variable `DOC_PATH` pointing to the folder where your documents are located.
162
+
163
+ ```bash
164
+ export DOC_PATH="./my-docs"
165
+ ```
166
+
167
+ Step 2:
168
+ - If you're running the frontend app on localhost:8000, simply select "My Documents" from the "Report Source" Dropdown Options.
169
+ - If you're running GPT Researcher with the [PIP package](https://docs.tavily.com/docs/gpt-researcher/pip-package), pass the `report_source` argument as "local" when you instantiate the `GPTResearcher` class [code sample here](https://docs.gptr.dev/docs/gpt-researcher/context/tailored-research).
170
+
171
+
172
+ ## 👪 Multi-Agent Assistant
173
+ As AI evolves from prompt engineering and RAG to multi-agent systems, we're excited to introduce our new multi-agent assistant built with [LangGraph](https://python.langchain.com/v0.1/docs/langgraph/).
174
+
175
+ By using LangGraph, the research process can be significantly improved in depth and quality by leveraging multiple agents with specialized skills. Inspired by the recent [STORM](https://arxiv.org/abs/2402.14207) paper, this project showcases how a team of AI agents can work together to conduct research on a given topic, from planning to publication.
176
+
177
+ An average run generates a 5-6 page research report in multiple formats such as PDF, Docx and Markdown.
178
+
179
+ Check it out [here](https://github.com/assafelovic/gpt-researcher/tree/master/multi_agents) or head over to our [documentation](https://docs.gptr.dev/docs/gpt-researcher/multi_agents/langgraph) for more information.
180
+
181
+ ## 🖥️ Frontend Applications
182
+
183
+ GPT-Researcher now features an enhanced frontend to improve the user experience and streamline the research process. The frontend offers:
184
+
185
+ - An intuitive interface for inputting research queries
186
+ - Real-time progress tracking of research tasks
187
+ - Interactive display of research findings
188
+ - Customizable settings for tailored research experiences
189
+
190
+ Two deployment options are available:
191
+ 1. A lightweight static frontend served by FastAPI
192
+ 2. A feature-rich NextJS application for advanced functionality
193
+
194
+ For detailed setup instructions and more information about the frontend features, please visit our [documentation page](https://docs.gptr.dev/docs/gpt-researcher/frontend/frontend).
195
+
196
+ ## 🚀 Contributing
197
+ We highly welcome contributions! Please check out [contributing](https://github.com/assafelovic/gpt-researcher/blob/master/CONTRIBUTING.md) if you're interested.
198
+
199
+ Please check out our [roadmap](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) page and reach out to us via our [Discord community](https://discord.gg/QgZXvJAccX) if you're interested in joining our mission.
200
+ <a href="https://github.com/assafelovic/gpt-researcher/graphs/contributors">
201
+ <img src="https://contrib.rocks/image?repo=assafelovic/gpt-researcher" />
202
+ </a>
203
+ ## ✉️ Support / Contact us
204
+ - [Community Discord](https://discord.gg/spBgZmm3Xe)
205
+ - Author Email: [email protected]
206
+
207
+ ## 🛡 Disclaimer
208
+
209
+ This project, GPT Researcher, is an experimental application and is provided "as-is" without any warranty, express or implied. We are sharing codes for academic purposes under the Apache 2 license. Nothing herein is academic advice, and NOT a recommendation to use in academic or research papers.
210
+
211
+ Our view on unbiased research claims:
212
+ 1. The main goal of GPT Researcher is to reduce incorrect and biased facts. How? We assume that the more sites we scrape the less chances of incorrect data. By scraping multiple sites per research, and choosing the most frequent information, the chances that they are all wrong is extremely low.
213
+ 2. We do not aim to eliminate biases; we aim to reduce it as much as possible. **We are here as a community to figure out the most effective human/llm interactions.**
214
+ 3. In research, people also tend towards biases as most have already opinions on the topics they research about. This tool scrapes many opinions and will evenly explain diverse views that a biased person would never have read.
215
+
216
+ ---
217
+
218
+ <p align="center">
219
+ <a href="https://star-history.com/#assafelovic/gpt-researcher">
220
+ <picture>
221
+ <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date&theme=dark" />
222
+ <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
223
+ <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
224
+ </picture>
225
+ </a>
226
+ </p>
227
+
228
+
229
+ <p align="right">
230
+ <a href="#top">⬆️ Back to Top</a>
231
+ </p>
__pycache__/main.cpython-312.pyc ADDED
Binary file (1.49 kB). View file
 
backend/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from multi_agents import agents
backend/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (202 Bytes). View file
 
backend/__pycache__/utils.cpython-312.pyc ADDED
Binary file (4.01 kB). View file
 
backend/chat/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .chat import ChatAgentWithMemory
backend/chat/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (212 Bytes). View file
 
backend/chat/__pycache__/chat.cpython-312.pyc ADDED
Binary file (5.49 kB). View file
 
backend/chat/chat.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import WebSocket
2
+ import uuid
3
+
4
+ from gpt_researcher.utils.llm import get_llm
5
+ from gpt_researcher.memory import Memory
6
+ from gpt_researcher.config.config import Config
7
+
8
+ from langgraph.prebuilt import create_react_agent
9
+ from langgraph.checkpoint.memory import MemorySaver
10
+
11
+ from langchain_community.vectorstores import InMemoryVectorStore
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from langchain.tools import Tool, tool
14
+
15
+ class ChatAgentWithMemory:
16
+ def __init__(
17
+ self,
18
+ report: str,
19
+ config_path,
20
+ headers,
21
+ vector_store = None
22
+ ):
23
+ self.report = report
24
+ self.headers = headers
25
+ self.config = Config(config_path)
26
+ self.vector_store = vector_store
27
+ self.graph = self.create_agent()
28
+
29
+ def create_agent(self):
30
+ """Create React Agent Graph"""
31
+ cfg = Config()
32
+
33
+ # Retrieve LLM using get_llm with settings from config
34
+ provider = get_llm(
35
+ llm_provider=cfg.smart_llm_provider,
36
+ model=cfg.smart_llm_model,
37
+ temperature=0.35,
38
+ max_tokens=cfg.smart_token_limit,
39
+ **self.config.llm_kwargs
40
+ ).llm
41
+
42
+ # If vector_store is not initialized, process documents and add to vector_store
43
+ if not self.vector_store:
44
+ documents = self._process_document(self.report)
45
+ self.chat_config = {"configurable": {"thread_id": str(uuid.uuid4())}}
46
+ self.embedding = Memory(
47
+ cfg.embedding_provider,
48
+ cfg.embedding_model,
49
+ **cfg.embedding_kwargs
50
+ ).get_embeddings()
51
+ self.vector_store = InMemoryVectorStore(self.embedding)
52
+ self.vector_store.add_texts(documents)
53
+
54
+ # Create the React Agent Graph with the configured provider
55
+ graph = create_react_agent(
56
+ provider,
57
+ tools=[self.vector_store_tool(self.vector_store)],
58
+ checkpointer=MemorySaver()
59
+ )
60
+
61
+ return graph
62
+
63
+ def vector_store_tool(self, vector_store) -> Tool:
64
+ """Create Vector Store Tool"""
65
+ @tool
66
+ def retrieve_info(query):
67
+ """
68
+ Consult the report for relevant contexts whenever you don't know something
69
+ """
70
+ retriever = vector_store.as_retriever(k = 4)
71
+ return retriever.invoke(query)
72
+ return retrieve_info
73
+
74
+ def _process_document(self, report):
75
+ """Split Report into Chunks"""
76
+ text_splitter = RecursiveCharacterTextSplitter(
77
+ chunk_size=1024,
78
+ chunk_overlap=20,
79
+ length_function=len,
80
+ is_separator_regex=False,
81
+ )
82
+ documents = text_splitter.split_text(report)
83
+ return documents
84
+
85
+ async def chat(self, message, websocket):
86
+ """Chat with React Agent"""
87
+ message = f"""
88
+ You are GPT Researcher, a autonomous research agent created by an open source community at https://github.com/assafelovic/gpt-researcher, homepage: https://gptr.dev.
89
+ To learn more about GPT Researcher you can suggest to check out: https://docs.gptr.dev.
90
+
91
+ This is a chat message between the user and you: GPT Researcher.
92
+ The chat is about a research reports that you created. Answer based on the given context and report.
93
+ You must include citations to your answer based on the report.
94
+
95
+ Report: {self.report}
96
+ User Message: {message}
97
+ """
98
+ inputs = {"messages": [("user", message)]}
99
+ response = await self.graph.ainvoke(inputs, config=self.chat_config)
100
+ ai_message = response["messages"][-1].content
101
+ if websocket is not None:
102
+ await websocket.send_json({"type": "chat", "content": ai_message})
103
+
104
+ def get_context(self):
105
+ """return the current context of the chat"""
106
+ return self.report
backend/memory/__init__.py ADDED
File without changes
backend/memory/draft.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import TypedDict, List, Annotated
2
+ import operator
3
+
4
+
5
+ class DraftState(TypedDict):
6
+ task: dict
7
+ topic: str
8
+ draft: dict
9
+ review: str
10
+ revision_notes: str
backend/memory/research.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import TypedDict, List, Annotated
2
+ import operator
3
+
4
+
5
+ class ResearchState(TypedDict):
6
+ task: dict
7
+ initial_research: str
8
+ sections: List[str]
9
+ research_data: List[dict]
10
+ # Report layout
11
+ title: str
12
+ headers: dict
13
+ date: str
14
+ table_of_contents: str
15
+ introduction: str
16
+ conclusion: str
17
+ sources: List[str]
18
+ report: str
19
+
20
+
backend/report_type/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from .basic_report.basic_report import BasicReport
2
+ from .detailed_report.detailed_report import DetailedReport
3
+
4
+ __all__ = [
5
+ "BasicReport",
6
+ "DetailedReport"
7
+ ]
backend/report_type/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (345 Bytes). View file
 
backend/report_type/basic_report/__init__.py ADDED
File without changes
backend/report_type/basic_report/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (181 Bytes). View file
 
backend/report_type/basic_report/__pycache__/basic_report.cpython-312.pyc ADDED
Binary file (1.84 kB). View file
 
backend/report_type/basic_report/basic_report.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import WebSocket
2
+ from typing import Any
3
+
4
+ from gpt_researcher import GPTResearcher
5
+
6
+
7
+ class BasicReport:
8
+ def __init__(
9
+ self,
10
+ query: str,
11
+ report_type: str,
12
+ report_source: str,
13
+ source_urls,
14
+ document_urls,
15
+ tone: Any,
16
+ config_path: str,
17
+ websocket: WebSocket,
18
+ headers=None
19
+ ):
20
+ self.query = query
21
+ self.report_type = report_type
22
+ self.report_source = report_source
23
+ self.source_urls = source_urls
24
+ self.document_urls = document_urls
25
+ self.tone = tone
26
+ self.config_path = config_path
27
+ self.websocket = websocket
28
+ self.headers = headers or {}
29
+
30
+ async def run(self):
31
+ # Initialize researcher
32
+ researcher = GPTResearcher(
33
+ query=self.query,
34
+ report_type=self.report_type,
35
+ report_source=self.report_source,
36
+ source_urls=self.source_urls,
37
+ document_urls=self.document_urls,
38
+ tone=self.tone,
39
+ config_path=self.config_path,
40
+ websocket=self.websocket,
41
+ headers=self.headers
42
+ )
43
+
44
+ await researcher.conduct_research()
45
+ report = await researcher.write_report()
46
+ return report
backend/report_type/detailed_report/README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Detailed Reports
2
+
3
+ Introducing long and detailed reports, with a completely new architecture inspired by the latest [STORM](https://arxiv.org/abs/2402.14207) paper.
4
+
5
+ In this method we do the following:
6
+
7
+ 1. Trigger Initial GPT Researcher report based on task
8
+ 2. Generate subtopics from research summary
9
+ 3. For each subtopic the headers of the subtopic report are extracted and accumulated
10
+ 4. For each subtopic a report is generated making sure that any information about the headers accumulated until now are not re-generated.
11
+ 5. An additional introduction section is written along with a table of contents constructed from the entire report.
12
+ 6. The final report is constructed by appending these : Intro + Table of contents + Subsection reports
backend/report_type/detailed_report/__init__.py ADDED
File without changes
backend/report_type/detailed_report/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (184 Bytes). View file
 
backend/report_type/detailed_report/__pycache__/detailed_report.cpython-312.pyc ADDED
Binary file (8.03 kB). View file
 
backend/report_type/detailed_report/detailed_report.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from typing import List, Dict, Set, Optional, Any
3
+ from fastapi import WebSocket
4
+
5
+ from gpt_researcher import GPTResearcher
6
+
7
+
8
+ class DetailedReport:
9
+ def __init__(
10
+ self,
11
+ query: str,
12
+ report_type: str,
13
+ report_source: str,
14
+ source_urls: List[str] = [],
15
+ document_urls: List[str] = [],
16
+ config_path: str = None,
17
+ tone: Any = "",
18
+ websocket: WebSocket = None,
19
+ subtopics: List[Dict] = [],
20
+ headers: Optional[Dict] = None
21
+ ):
22
+ self.query = query
23
+ self.report_type = report_type
24
+ self.report_source = report_source
25
+ self.source_urls = source_urls
26
+ self.document_urls = document_urls
27
+ self.config_path = config_path
28
+ self.tone = tone
29
+ self.websocket = websocket
30
+ self.subtopics = subtopics
31
+ self.headers = headers or {}
32
+
33
+ self.gpt_researcher = GPTResearcher(
34
+ query=self.query,
35
+ report_type="research_report",
36
+ report_source=self.report_source,
37
+ source_urls=self.source_urls,
38
+ document_urls=self.document_urls,
39
+ config_path=self.config_path,
40
+ tone=self.tone,
41
+ websocket=self.websocket,
42
+ headers=self.headers
43
+ )
44
+ self.existing_headers: List[Dict] = []
45
+ self.global_context: List[str] = []
46
+ self.global_written_sections: List[str] = []
47
+ self.global_urls: Set[str] = set(
48
+ self.source_urls) if self.source_urls else set()
49
+
50
+ async def run(self) -> str:
51
+ await self._initial_research()
52
+ subtopics = await self._get_all_subtopics()
53
+ report_introduction = await self.gpt_researcher.write_introduction()
54
+ _, report_body = await self._generate_subtopic_reports(subtopics)
55
+ self.gpt_researcher.visited_urls.update(self.global_urls)
56
+ report = await self._construct_detailed_report(report_introduction, report_body)
57
+ return report
58
+
59
+ async def _initial_research(self) -> None:
60
+ await self.gpt_researcher.conduct_research()
61
+ self.global_context = self.gpt_researcher.context
62
+ self.global_urls = self.gpt_researcher.visited_urls
63
+
64
+ async def _get_all_subtopics(self) -> List[Dict]:
65
+ subtopics_data = await self.gpt_researcher.get_subtopics()
66
+
67
+ all_subtopics = []
68
+ if subtopics_data and subtopics_data.subtopics:
69
+ for subtopic in subtopics_data.subtopics:
70
+ all_subtopics.append({"task": subtopic.task})
71
+ else:
72
+ print(f"Unexpected subtopics data format: {subtopics_data}")
73
+
74
+ return all_subtopics
75
+
76
+ async def _generate_subtopic_reports(self, subtopics: List[Dict]) -> tuple:
77
+ subtopic_reports = []
78
+ subtopics_report_body = ""
79
+
80
+ for subtopic in subtopics:
81
+ result = await self._get_subtopic_report(subtopic)
82
+ if result["report"]:
83
+ subtopic_reports.append(result)
84
+ subtopics_report_body += f"\n\n\n{result['report']}"
85
+
86
+ return subtopic_reports, subtopics_report_body
87
+
88
+ async def _get_subtopic_report(self, subtopic: Dict) -> Dict[str, str]:
89
+ current_subtopic_task = subtopic.get("task")
90
+ subtopic_assistant = GPTResearcher(
91
+ query=current_subtopic_task,
92
+ report_type="subtopic_report",
93
+ report_source=self.report_source,
94
+ websocket=self.websocket,
95
+ headers=self.headers,
96
+ parent_query=self.query,
97
+ subtopics=self.subtopics,
98
+ visited_urls=self.global_urls,
99
+ agent=self.gpt_researcher.agent,
100
+ role=self.gpt_researcher.role,
101
+ tone=self.tone,
102
+ )
103
+
104
+ subtopic_assistant.context = list(set(self.global_context))
105
+ await subtopic_assistant.conduct_research()
106
+
107
+ draft_section_titles = await subtopic_assistant.get_draft_section_titles(current_subtopic_task)
108
+
109
+ if not isinstance(draft_section_titles, str):
110
+ draft_section_titles = str(draft_section_titles)
111
+
112
+ parse_draft_section_titles = self.gpt_researcher.extract_headers(draft_section_titles)
113
+ parse_draft_section_titles_text = [header.get(
114
+ "text", "") for header in parse_draft_section_titles]
115
+
116
+ relevant_contents = await subtopic_assistant.get_similar_written_contents_by_draft_section_titles(
117
+ current_subtopic_task, parse_draft_section_titles_text, self.global_written_sections
118
+ )
119
+
120
+ subtopic_report = await subtopic_assistant.write_report(self.existing_headers, relevant_contents)
121
+
122
+ self.global_written_sections.extend(self.gpt_researcher.extract_sections(subtopic_report))
123
+ self.global_context = list(set(subtopic_assistant.context))
124
+ self.global_urls.update(subtopic_assistant.visited_urls)
125
+
126
+ self.existing_headers.append({
127
+ "subtopic task": current_subtopic_task,
128
+ "headers": self.gpt_researcher.extract_headers(subtopic_report),
129
+ })
130
+
131
+ return {"topic": subtopic, "report": subtopic_report}
132
+
133
+ async def _construct_detailed_report(self, introduction: str, report_body: str) -> str:
134
+ toc = self.gpt_researcher.table_of_contents(report_body)
135
+ conclusion = await self.gpt_researcher.write_report_conclusion(report_body)
136
+ conclusion_with_references = self.gpt_researcher.add_references(
137
+ conclusion, self.gpt_researcher.visited_urls)
138
+ report = f"{introduction}\n\n{toc}\n\n{report_body}\n\n{conclusion_with_references}"
139
+ return report
backend/server/__init__.py ADDED
File without changes
backend/server/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (163 Bytes). View file
 
backend/server/__pycache__/server.cpython-312.pyc ADDED
Binary file (5.85 kB). View file
 
backend/server/__pycache__/server_utils.cpython-312.pyc ADDED
Binary file (14.8 kB). View file
 
backend/server/__pycache__/websocket_manager.cpython-312.pyc ADDED
Binary file (5.55 kB). View file
 
backend/server/app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ app = FastAPI()
8
+
9
+ # Add CORS middleware
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["*"], # In production, replace with your frontend domain
13
+ allow_credentials=True,
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
backend/server/logging_config.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import json
3
+ import os
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+
7
+ class JSONResearchHandler:
8
+ def __init__(self, json_file):
9
+ self.json_file = json_file
10
+ self.research_data = {
11
+ "timestamp": datetime.now().isoformat(),
12
+ "events": [],
13
+ "content": {
14
+ "query": "",
15
+ "sources": [],
16
+ "context": [],
17
+ "report": "",
18
+ "costs": 0.0
19
+ }
20
+ }
21
+
22
+ def log_event(self, event_type: str, data: dict):
23
+ self.research_data["events"].append({
24
+ "timestamp": datetime.now().isoformat(),
25
+ "type": event_type,
26
+ "data": data
27
+ })
28
+ self._save_json()
29
+
30
+ def update_content(self, key: str, value):
31
+ self.research_data["content"][key] = value
32
+ self._save_json()
33
+
34
+ def _save_json(self):
35
+ with open(self.json_file, 'w') as f:
36
+ json.dump(self.research_data, f, indent=2)
37
+
38
+ def setup_research_logging():
39
+ # Create logs directory if it doesn't exist
40
+ logs_dir = Path("logs")
41
+ logs_dir.mkdir(exist_ok=True)
42
+
43
+ # Generate timestamp for log files
44
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
45
+
46
+ # Create log file paths
47
+ log_file = logs_dir / f"research_{timestamp}.log"
48
+ json_file = logs_dir / f"research_{timestamp}.json"
49
+
50
+ # Configure file handler for research logs
51
+ file_handler = logging.FileHandler(log_file)
52
+ file_handler.setLevel(logging.INFO)
53
+ file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
54
+
55
+ # Get research logger and configure it
56
+ research_logger = logging.getLogger('research')
57
+ research_logger.setLevel(logging.INFO)
58
+
59
+ # Remove any existing handlers to avoid duplicates
60
+ research_logger.handlers.clear()
61
+
62
+ # Add file handler
63
+ research_logger.addHandler(file_handler)
64
+
65
+ # Add stream handler for console output
66
+ console_handler = logging.StreamHandler()
67
+ console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
68
+ research_logger.addHandler(console_handler)
69
+
70
+ # Prevent propagation to root logger to avoid duplicate logs
71
+ research_logger.propagate = False
72
+
73
+ # Create JSON handler
74
+ json_handler = JSONResearchHandler(json_file)
75
+
76
+ return str(log_file), str(json_file), research_logger, json_handler
77
+
78
+ # Create a function to get the logger and JSON handler
79
+ def get_research_logger():
80
+ return logging.getLogger('research')
81
+
82
+ def get_json_handler():
83
+ return getattr(logging.getLogger('research'), 'json_handler', None)
backend/server/server.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from typing import Dict, List
4
+
5
+ from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect, File, UploadFile, Header
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.staticfiles import StaticFiles
8
+ from fastapi.templating import Jinja2Templates
9
+ from pydantic import BaseModel
10
+
11
+ from backend.server.websocket_manager import WebSocketManager
12
+ from backend.server.server_utils import (
13
+ get_config_dict,
14
+ update_environment_variables, handle_file_upload, handle_file_deletion,
15
+ execute_multi_agents, handle_websocket_communication
16
+ )
17
+
18
+
19
+ from gpt_researcher.utils.logging_config import setup_research_logging
20
+
21
+ import logging
22
+
23
+ # Get logger instance
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # Don't override parent logger settings
27
+ logger.propagate = True
28
+
29
+ logging.basicConfig(
30
+ level=logging.INFO,
31
+ format="%(asctime)s - %(levelname)s - %(message)s",
32
+ handlers=[
33
+ logging.StreamHandler() # Only log to console
34
+ ]
35
+ )
36
+
37
+ # Models
38
+
39
+
40
+ class ResearchRequest(BaseModel):
41
+ task: str
42
+ report_type: str
43
+ agent: str
44
+
45
+
46
+ class ConfigRequest(BaseModel):
47
+ ANTHROPIC_API_KEY: str
48
+ TAVILY_API_KEY: str
49
+ LANGCHAIN_TRACING_V2: str
50
+ LANGCHAIN_API_KEY: str
51
+ OPENAI_API_KEY: str
52
+ DOC_PATH: str
53
+ RETRIEVER: str
54
+ GOOGLE_API_KEY: str = ''
55
+ GOOGLE_CX_KEY: str = ''
56
+ BING_API_KEY: str = ''
57
+ SEARCHAPI_API_KEY: str = ''
58
+ SERPAPI_API_KEY: str = ''
59
+ SERPER_API_KEY: str = ''
60
+ SEARX_URL: str = ''
61
+ XAI_API_KEY: str
62
+ DEEPSEEK_API_KEY: str
63
+
64
+
65
+ # App initialization
66
+ app = FastAPI()
67
+
68
+ # Static files and templates
69
+ app.mount("/site", StaticFiles(directory="./frontend"), name="site")
70
+ app.mount("/static", StaticFiles(directory="./frontend/static"), name="static")
71
+ templates = Jinja2Templates(directory="./frontend")
72
+
73
+ # WebSocket manager
74
+ manager = WebSocketManager()
75
+
76
+ # Middleware
77
+ app.add_middleware(
78
+ CORSMiddleware,
79
+ allow_origins=["http://localhost:3000"],
80
+ allow_credentials=True,
81
+ allow_methods=["*"],
82
+ allow_headers=["*"],
83
+ )
84
+
85
+ # Constants
86
+ DOC_PATH = os.getenv("DOC_PATH", "./my-docs")
87
+
88
+ # Startup event
89
+
90
+
91
+ @app.on_event("startup")
92
+ def startup_event():
93
+ os.makedirs("outputs", exist_ok=True)
94
+ app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs")
95
+ os.makedirs(DOC_PATH, exist_ok=True)
96
+
97
+
98
+ # Routes
99
+
100
+
101
+ @app.get("/")
102
+ async def read_root(request: Request):
103
+ return templates.TemplateResponse("index.html", {"request": request, "report": None})
104
+
105
+
106
+ @app.get("/files/")
107
+ async def list_files():
108
+ files = os.listdir(DOC_PATH)
109
+ print(f"Files in {DOC_PATH}: {files}")
110
+ return {"files": files}
111
+
112
+
113
+ @app.post("/api/multi_agents")
114
+ async def run_multi_agents():
115
+ return await execute_multi_agents(manager)
116
+
117
+
118
+ @app.post("/upload/")
119
+ async def upload_file(file: UploadFile = File(...)):
120
+ return await handle_file_upload(file, DOC_PATH)
121
+
122
+
123
+ @app.delete("/files/{filename}")
124
+ async def delete_file(filename: str):
125
+ return await handle_file_deletion(filename, DOC_PATH)
126
+
127
+
128
+ @app.websocket("/ws")
129
+ async def websocket_endpoint(websocket: WebSocket):
130
+ await manager.connect(websocket)
131
+ try:
132
+ await handle_websocket_communication(websocket, manager)
133
+ except WebSocketDisconnect:
134
+ await manager.disconnect(websocket)
backend/server/server_utils.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import re
4
+ import time
5
+ import shutil
6
+ from typing import Dict, List, Any
7
+ from fastapi.responses import JSONResponse, FileResponse
8
+ from gpt_researcher.document.document import DocumentLoader
9
+ from backend.utils import write_md_to_pdf, write_md_to_word, write_text_to_md
10
+ from pathlib import Path
11
+ from datetime import datetime
12
+ from fastapi import HTTPException
13
+ import logging
14
+
15
+ logging.basicConfig(level=logging.DEBUG)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ class CustomLogsHandler:
19
+ """Custom handler to capture streaming logs from the research process"""
20
+ def __init__(self, websocket, task: str):
21
+ self.logs = []
22
+ self.websocket = websocket
23
+ sanitized_filename = sanitize_filename(f"task_{int(time.time())}_{task}")
24
+ self.log_file = os.path.join("outputs", f"{sanitized_filename}.json")
25
+ self.timestamp = datetime.now().isoformat()
26
+ # Initialize log file with metadata
27
+ os.makedirs("outputs", exist_ok=True)
28
+ with open(self.log_file, 'w') as f:
29
+ json.dump({
30
+ "timestamp": self.timestamp,
31
+ "events": [],
32
+ "content": {
33
+ "query": "",
34
+ "sources": [],
35
+ "context": [],
36
+ "report": "",
37
+ "costs": 0.0
38
+ }
39
+ }, f, indent=2)
40
+
41
+ async def send_json(self, data: Dict[str, Any]) -> None:
42
+ """Store log data and send to websocket"""
43
+ # Send to websocket for real-time display
44
+ if self.websocket:
45
+ await self.websocket.send_json(data)
46
+
47
+ # Read current log file
48
+ with open(self.log_file, 'r') as f:
49
+ log_data = json.load(f)
50
+
51
+ # Update appropriate section based on data type
52
+ if data.get('type') == 'logs':
53
+ log_data['events'].append({
54
+ "timestamp": datetime.now().isoformat(),
55
+ "type": "event",
56
+ "data": data
57
+ })
58
+ else:
59
+ # Update content section for other types of data
60
+ log_data['content'].update(data)
61
+
62
+ # Save updated log file
63
+ with open(self.log_file, 'w') as f:
64
+ json.dump(log_data, f, indent=2)
65
+ logger.debug(f"Log entry written to: {self.log_file}")
66
+
67
+
68
+ class Researcher:
69
+ def __init__(self, query: str, report_type: str = "research_report"):
70
+ self.query = query
71
+ self.report_type = report_type
72
+ # Generate unique ID for this research task
73
+ self.research_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{hash(query)}"
74
+ # Initialize logs handler with research ID
75
+ self.logs_handler = CustomLogsHandler(self.research_id)
76
+ self.researcher = GPTResearcher(
77
+ query=query,
78
+ report_type=report_type,
79
+ websocket=self.logs_handler
80
+ )
81
+
82
+ async def research(self) -> dict:
83
+ """Conduct research and return paths to generated files"""
84
+ await self.researcher.conduct_research()
85
+ report = await self.researcher.write_report()
86
+
87
+ # Generate the files
88
+ sanitized_filename = sanitize_filename(f"task_{int(time.time())}_{self.query}")
89
+ file_paths = await generate_report_files(report, sanitized_filename)
90
+
91
+ # Get the JSON log path that was created by CustomLogsHandler
92
+ json_relative_path = os.path.relpath(self.logs_handler.log_file)
93
+
94
+ return {
95
+ "output": {
96
+ **file_paths, # Include PDF, DOCX, and MD paths
97
+ "json": json_relative_path
98
+ }
99
+ }
100
+
101
+ def sanitize_filename(filename: str) -> str:
102
+ # Split into components
103
+ prefix, timestamp, *task_parts = filename.split('_')
104
+ task = '_'.join(task_parts)
105
+
106
+ # Calculate max length for task portion
107
+ # 255 - len("outputs/") - len("task_") - len(timestamp) - len("_.json") - safety_margin
108
+ max_task_length = 255 - 8 - 5 - 10 - 6 - 10 # ~216 chars for task
109
+
110
+ # Truncate task if needed
111
+ truncated_task = task[:max_task_length] if len(task) > max_task_length else task
112
+
113
+ # Reassemble and clean the filename
114
+ sanitized = f"{prefix}_{timestamp}_{truncated_task}"
115
+ return re.sub(r"[^\w\s-]", "", sanitized).strip()
116
+
117
+
118
+ async def handle_start_command(websocket, data: str, manager):
119
+ json_data = json.loads(data[6:])
120
+ task, report_type, source_urls, document_urls, tone, headers, report_source = extract_command_data(
121
+ json_data)
122
+
123
+ if not task or not report_type:
124
+ print("Error: Missing task or report_type")
125
+ return
126
+
127
+ # Create logs handler with websocket and task
128
+ logs_handler = CustomLogsHandler(websocket, task)
129
+ # Initialize log content with query
130
+ await logs_handler.send_json({
131
+ "query": task,
132
+ "sources": [],
133
+ "context": [],
134
+ "report": ""
135
+ })
136
+
137
+ sanitized_filename = sanitize_filename(f"task_{int(time.time())}_{task}")
138
+
139
+ report = await manager.start_streaming(
140
+ task,
141
+ report_type,
142
+ report_source,
143
+ source_urls,
144
+ document_urls,
145
+ tone,
146
+ websocket,
147
+ headers
148
+ )
149
+ report = str(report)
150
+ file_paths = await generate_report_files(report, sanitized_filename)
151
+ # Add JSON log path to file_paths
152
+ file_paths["json"] = os.path.relpath(logs_handler.log_file)
153
+ await send_file_paths(websocket, file_paths)
154
+
155
+
156
+ async def handle_human_feedback(data: str):
157
+ feedback_data = json.loads(data[14:]) # Remove "human_feedback" prefix
158
+ print(f"Received human feedback: {feedback_data}")
159
+ # TODO: Add logic to forward the feedback to the appropriate agent or update the research state
160
+
161
+ async def handle_chat(websocket, data: str, manager):
162
+ json_data = json.loads(data[4:])
163
+ print(f"Received chat message: {json_data.get('message')}")
164
+ await manager.chat(json_data.get("message"), websocket)
165
+
166
+ async def generate_report_files(report: str, filename: str) -> Dict[str, str]:
167
+ pdf_path = await write_md_to_pdf(report, filename)
168
+ docx_path = await write_md_to_word(report, filename)
169
+ md_path = await write_text_to_md(report, filename)
170
+ return {"pdf": pdf_path, "docx": docx_path, "md": md_path}
171
+
172
+
173
+ async def send_file_paths(websocket, file_paths: Dict[str, str]):
174
+ await websocket.send_json({"type": "path", "output": file_paths})
175
+
176
+
177
+ def get_config_dict(
178
+ langchain_api_key: str, openai_api_key: str, tavily_api_key: str,
179
+ google_api_key: str, google_cx_key: str, bing_api_key: str,
180
+ searchapi_api_key: str, serpapi_api_key: str, serper_api_key: str, searx_url: str
181
+ ) -> Dict[str, str]:
182
+ return {
183
+ "LANGCHAIN_API_KEY": langchain_api_key or os.getenv("LANGCHAIN_API_KEY", ""),
184
+ "OPENAI_API_KEY": openai_api_key or os.getenv("OPENAI_API_KEY", ""),
185
+ "TAVILY_API_KEY": tavily_api_key or os.getenv("TAVILY_API_KEY", ""),
186
+ "GOOGLE_API_KEY": google_api_key or os.getenv("GOOGLE_API_KEY", ""),
187
+ "GOOGLE_CX_KEY": google_cx_key or os.getenv("GOOGLE_CX_KEY", ""),
188
+ "BING_API_KEY": bing_api_key or os.getenv("BING_API_KEY", ""),
189
+ "SEARCHAPI_API_KEY": searchapi_api_key or os.getenv("SEARCHAPI_API_KEY", ""),
190
+ "SERPAPI_API_KEY": serpapi_api_key or os.getenv("SERPAPI_API_KEY", ""),
191
+ "SERPER_API_KEY": serper_api_key or os.getenv("SERPER_API_KEY", ""),
192
+ "SEARX_URL": searx_url or os.getenv("SEARX_URL", ""),
193
+ "LANGCHAIN_TRACING_V2": os.getenv("LANGCHAIN_TRACING_V2", "true"),
194
+ "DOC_PATH": os.getenv("DOC_PATH", "./my-docs"),
195
+ "RETRIEVER": os.getenv("RETRIEVER", ""),
196
+ "EMBEDDING_MODEL": os.getenv("OPENAI_EMBEDDING_MODEL", "")
197
+ }
198
+
199
+
200
+ def update_environment_variables(config: Dict[str, str]):
201
+ for key, value in config.items():
202
+ os.environ[key] = value
203
+
204
+
205
+ async def handle_file_upload(file, DOC_PATH: str) -> Dict[str, str]:
206
+ file_path = os.path.join(DOC_PATH, os.path.basename(file.filename))
207
+ with open(file_path, "wb") as buffer:
208
+ shutil.copyfileobj(file.file, buffer)
209
+ print(f"File uploaded to {file_path}")
210
+
211
+ document_loader = DocumentLoader(DOC_PATH)
212
+ await document_loader.load()
213
+
214
+ return {"filename": file.filename, "path": file_path}
215
+
216
+
217
+ async def handle_file_deletion(filename: str, DOC_PATH: str) -> JSONResponse:
218
+ file_path = os.path.join(DOC_PATH, os.path.basename(filename))
219
+ if os.path.exists(file_path):
220
+ os.remove(file_path)
221
+ print(f"File deleted: {file_path}")
222
+ return JSONResponse(content={"message": "File deleted successfully"})
223
+ else:
224
+ print(f"File not found: {file_path}")
225
+ return JSONResponse(status_code=404, content={"message": "File not found"})
226
+
227
+
228
+ async def execute_multi_agents(manager) -> Any:
229
+ websocket = manager.active_connections[0] if manager.active_connections else None
230
+ if websocket:
231
+ report = await run_research_task("Is AI in a hype cycle?", websocket, stream_output)
232
+ return {"report": report}
233
+ else:
234
+ return JSONResponse(status_code=400, content={"message": "No active WebSocket connection"})
235
+
236
+
237
+ async def handle_websocket_communication(websocket, manager):
238
+ while True:
239
+ data = await websocket.receive_text()
240
+ if data.startswith("start"):
241
+ await handle_start_command(websocket, data, manager)
242
+ elif data.startswith("human_feedback"):
243
+ await handle_human_feedback(data)
244
+ elif data.startswith("chat"):
245
+ await handle_chat(websocket, data, manager)
246
+ else:
247
+ print("Error: Unknown command or not enough parameters provided.")
248
+
249
+
250
+ def extract_command_data(json_data: Dict) -> tuple:
251
+ return (
252
+ json_data.get("task"),
253
+ json_data.get("report_type"),
254
+ json_data.get("source_urls"),
255
+ json_data.get("document_urls"),
256
+ json_data.get("tone"),
257
+ json_data.get("headers", {}),
258
+ json_data.get("report_source")
259
+ )