Spaces:
Running
Running
Upload 528 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .cursorrules +1 -0
- .dockerignore +2 -0
- .env +8 -0
- .gitattributes +21 -0
- .github/ISSUE_TEMPLATE/bug_report.md +38 -0
- .github/ISSUE_TEMPLATE/feature_request.md +20 -0
- .github/dependabot.yml +15 -0
- .github/workflows/docker-build.yml +45 -0
- .gitignore +53 -0
- CODE_OF_CONDUCT.md +123 -0
- CONTRIBUTING.md +42 -0
- CURSOR_RULES.md +181 -0
- Dockerfile +46 -0
- LICENSE +201 -0
- Procfile +1 -0
- README-ja_JP.md +159 -0
- README-ko_KR.md +242 -0
- README-zh_CN.md +158 -0
- README.md +231 -11
- __pycache__/main.cpython-312.pyc +0 -0
- backend/__init__.py +1 -0
- backend/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/__pycache__/utils.cpython-312.pyc +0 -0
- backend/chat/__init__.py +1 -0
- backend/chat/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/chat/__pycache__/chat.cpython-312.pyc +0 -0
- backend/chat/chat.py +106 -0
- backend/memory/__init__.py +0 -0
- backend/memory/draft.py +10 -0
- backend/memory/research.py +20 -0
- backend/report_type/__init__.py +7 -0
- backend/report_type/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/report_type/basic_report/__init__.py +0 -0
- backend/report_type/basic_report/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/report_type/basic_report/__pycache__/basic_report.cpython-312.pyc +0 -0
- backend/report_type/basic_report/basic_report.py +46 -0
- backend/report_type/detailed_report/README.md +12 -0
- backend/report_type/detailed_report/__init__.py +0 -0
- backend/report_type/detailed_report/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/report_type/detailed_report/__pycache__/detailed_report.cpython-312.pyc +0 -0
- backend/report_type/detailed_report/detailed_report.py +139 -0
- backend/server/__init__.py +0 -0
- backend/server/__pycache__/__init__.cpython-312.pyc +0 -0
- backend/server/__pycache__/server.cpython-312.pyc +0 -0
- backend/server/__pycache__/server_utils.cpython-312.pyc +0 -0
- backend/server/__pycache__/websocket_manager.cpython-312.pyc +0 -0
- backend/server/app.py +16 -0
- backend/server/logging_config.py +83 -0
- backend/server/server.py +134 -0
- backend/server/server_utils.py +259 -0
.cursorrules
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Project Overview This project, named GPT-Researcher, LLM based autonomous agent that conducts local and web research on any topic and generates a comprehensive report with citations, is built using Next.js and TypeScript. It integrates various libraries for their strenghts. Your primary goal is to help with Next.js app router patterns, TypeScript type safety, Tailwind CSS best practices, code quality standards, and Python/FastAPI backend optimizations. # Key URLs - Project Home Page: https://gptr.dev/ - GitHub Repository: https://github.com/assafelovic/gpt-researcher - Documentation: https://docs.gptr.dev/ # Project Structure - Frontend user interface built with Next.js, TypeScript, and Tailwind CSS in `/frontend` - Static FastAPI version for lightweight deployments - Next.js version for production use with enhanced features - Multi-agent research system using LangChain and LangGraph in `/backend/multi_agents` - Browser, Editor, Researcher, Reviewer, Revisor, Writer, and Publisher agents - Task configuration and agent coordination - Document processing using Unstructured and PyMuPDF in `/backend/document_processing` - PDF, DOCX, and web content parsing - Text extraction and preprocessing - Report generation using LangChain and Jinja2 templates in `/backend/report_generation` - Template-based report structuring - Dynamic content formatting - Multiple output formats in `/backend/output_formats` - PDF via md2pdf - Markdown via mistune - DOCX via python-docx - Format conversion utilities - Export functionality - GPT Researcher core functionality in `/gpt_researcher` - Web scraping and content aggregation - Research planning and execution - Source validation and tracking - Query processing and response generation - Testing infrastructure in `/tests` - Unit tests for individual components - Integration tests for agent interactions - End-to-end research workflow tests - Mock data and fixtures for testing # Language Model Configuration - Default model: gpt-4-turbo - Alternative models: gpt-3.5-turbo, claude-3-opus - Temperature settings for different tasks - Context window management - Token limit handling - Cost optimization strategies # Error Handling - Research failure recovery - API rate limiting - Network timeout handling - Invalid input management - Source validation errors - Report generation failures # Performance - Parallel processing strategies - Caching mechanisms - Memory management - Response streaming - Resource allocation - Query optimization # Development Workflow - Branch naming conventions - Commit message format - PR review process - Testing requirements - Documentation updates - Version control guidelines # API Documentation - REST endpoints - WebSocket events - Request/Response formats - Authentication methods - Rate limits - Error codes # Monitoring - Performance metrics - Error tracking - Usage statistics - Cost monitoring - Research quality metrics - User feedback tracking # Frontend Components - Static FastAPI version for lightweight deployments - Next.js version for production use with enhanced features # Backend Components - Multi-agent system architecture - Document processing pipeline - Report generation system - Output format handlers # Core Research Components - Web scraping and aggregation - Research planning and execution - Source validation - Query processing # Testing - Unit tests - Integration tests - End-to-end tests - Performance testing # Rule Violation Monitoring - Alert developer when changes conflict with project structure - Warn about deviations from coding standards - Flag unauthorized framework or library additions - Monitor for security and performance anti-patterns - Track API usage patterns that may violate guidelines - Report TypeScript strict mode violations - Identify accessibility compliance issues # Development Guidelines - Use TypeScript with strict mode enabled - Follow ESLint and Prettier configurations - Ensure components are responsive and accessible - Use Tailwind CSS for styling, following the project's design system - Minimize AI-generated comments, prefer self-documenting code - Follow React best practices and hooks guidelines - Validate all user inputs and API responses - Use existing components as reference implementations # Important Scripts - `npm run dev`: Start development server - `npm run build`: Build for production - `npm run test`: Run test suite - `python -m pytest`: Run Python tests - `docker-compose up`: Start all services - `docker-compose run gpt-researcher-tests`: Run test suite in container - `python -m uvicorn backend.server.server:app --host=0.0.0.0 --port=8000`: Start FastAPI server - `python -m uvicorn backend.server.server:app --reload`: Start FastAPI server with auto-reload for development - `python main.py`: Run the main application directly # AI Integration Guidelines - Prioritize type safety in all AI interactions - Follow LangChain and LangGraph best practices - Implement proper error handling for AI responses - Maintain context window limits - Handle rate limiting and API quotas - Validate AI outputs before processing - Log AI interactions for debugging # Lexicon - **GPT Researcher**: Autonomous research agent system - **Multi-Agent System**: Coordinated AI agents for research tasks - **Research Pipeline**: End-to-end research workflow - **Agent Roles**: Browser, Editor, Researcher, Reviewer, Revisor, Writer, Publisher - **Source Validation**: Verification of research sources - **Report Generation**: Process of creating final research output # Additional Resources - [Next.js Documentation](https://nextjs.org/docs) - [TypeScript Handbook](https://www.typescriptlang.org/docs/) - [Tailwind CSS Documentation](https://tailwindcss.com/docs) - [LangChain Documentation](https://python.langchain.com/docs/) - [FastAPI Documentation](https://fastapi.tiangolo.com/) - [Project Documentation](https://docs.gptr.dev/) End all your comments with a :-) symbol.
|
.dockerignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.git
|
2 |
+
output/
|
.env
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GOOGLE_API_KEY=AIzaSyCISHY92IzU60M8Jf0qCWIRCyhGUAj_haU
|
2 |
+
FAST_LLM="google_genai:gemini-1.5-flash"
|
3 |
+
SMART_LLM="google_genai:gemini-1.5-pro"
|
4 |
+
STRATEGIC_LLM="google_genai:gemini-1.5-pro"
|
5 |
+
|
6 |
+
EMBEDDING="google_genai:models/text-embedding-004"
|
7 |
+
|
8 |
+
TAVILY_API_KEY=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
|
.gitattributes
CHANGED
@@ -33,3 +33,24 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
docs/blog/2023-09-22-gpt-researcher/architecture.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
docs/blog/2023-09-22-gpt-researcher/planner.jpeg filter=lfs diff=lfs merge=lfs -text
|
38 |
+
docs/blog/2024-05-19-gptr-langgraph/blog-langgraph.jpeg filter=lfs diff=lfs merge=lfs -text
|
39 |
+
docs/blog/2024-09-7-hybrid-research/gptr-hybrid.png filter=lfs diff=lfs merge=lfs -text
|
40 |
+
docs/docs/gpt-researcher/context/gptr-hybrid.png filter=lfs diff=lfs merge=lfs -text
|
41 |
+
docs/static/img/architecture.png filter=lfs diff=lfs merge=lfs -text
|
42 |
+
docs/static/img/leaderboard.png filter=lfs diff=lfs merge=lfs -text
|
43 |
+
frontend/nextjs/public/img/agents/academicResearchAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
44 |
+
frontend/nextjs/public/img/agents/businessAnalystAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
45 |
+
frontend/nextjs/public/img/agents/computerSecurityanalystAvatar.png filter=lfs diff=lfs merge=lfs -text
|
46 |
+
frontend/nextjs/public/img/agents/financeAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
47 |
+
frontend/nextjs/public/img/agents/mathAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
48 |
+
frontend/nextjs/public/img/agents/travelAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
49 |
+
frontend/nextjs/public/img/gptr-logo.png filter=lfs diff=lfs merge=lfs -text
|
50 |
+
frontend/static/academicResearchAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
51 |
+
frontend/static/businessAnalystAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
52 |
+
frontend/static/computerSecurityanalystAvatar.png filter=lfs diff=lfs merge=lfs -text
|
53 |
+
frontend/static/financeAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
54 |
+
frontend/static/mathAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
55 |
+
frontend/static/travelAgentAvatar.png filter=lfs diff=lfs merge=lfs -text
|
56 |
+
tests/docs/doc.pdf filter=lfs diff=lfs merge=lfs -text
|
.github/ISSUE_TEMPLATE/bug_report.md
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
name: Bug report
|
3 |
+
about: Create a report to help us improve
|
4 |
+
title: ''
|
5 |
+
labels: ''
|
6 |
+
assignees: ''
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
**Describe the bug**
|
11 |
+
A clear and concise description of what the bug is.
|
12 |
+
|
13 |
+
**To Reproduce**
|
14 |
+
Steps to reproduce the behavior:
|
15 |
+
1. Go to '...'
|
16 |
+
2. Click on '....'
|
17 |
+
3. Scroll down to '....'
|
18 |
+
4. See error
|
19 |
+
|
20 |
+
**Expected behavior**
|
21 |
+
A clear and concise description of what you expected to happen.
|
22 |
+
|
23 |
+
**Screenshots**
|
24 |
+
If applicable, add screenshots to help explain your problem.
|
25 |
+
|
26 |
+
**Desktop (please complete the following information):**
|
27 |
+
- OS: [e.g. iOS]
|
28 |
+
- Browser [e.g. chrome, safari]
|
29 |
+
- Version [e.g. 22]
|
30 |
+
|
31 |
+
**Smartphone (please complete the following information):**
|
32 |
+
- Device: [e.g. iPhone6]
|
33 |
+
- OS: [e.g. iOS8.1]
|
34 |
+
- Browser [e.g. stock browser, safari]
|
35 |
+
- Version [e.g. 22]
|
36 |
+
|
37 |
+
**Additional context**
|
38 |
+
Add any other context about the problem here.
|
.github/ISSUE_TEMPLATE/feature_request.md
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
name: Feature request
|
3 |
+
about: Suggest an idea for this project
|
4 |
+
title: ''
|
5 |
+
labels: ''
|
6 |
+
assignees: ''
|
7 |
+
|
8 |
+
---
|
9 |
+
|
10 |
+
**Is your feature request related to a problem? Please describe.**
|
11 |
+
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
|
12 |
+
|
13 |
+
**Describe the solution you'd like**
|
14 |
+
A clear and concise description of what you want to happen.
|
15 |
+
|
16 |
+
**Describe alternatives you've considered**
|
17 |
+
A clear and concise description of any alternative solutions or features you've considered.
|
18 |
+
|
19 |
+
**Additional context**
|
20 |
+
Add any other context or screenshots about the feature request here.
|
.github/dependabot.yml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# To get started with Dependabot version updates, you'll need to specify which
|
2 |
+
# package ecosystems to update and where the package manifests are located.
|
3 |
+
# Please see the documentation for all configuration options:
|
4 |
+
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
|
5 |
+
|
6 |
+
version: 2
|
7 |
+
updates:
|
8 |
+
- package-ecosystem: "pip" # See documentation for possible values
|
9 |
+
directory: "/" # Location of package manifests
|
10 |
+
schedule:
|
11 |
+
interval: "weekly"
|
12 |
+
- package-ecosystem: "docker"
|
13 |
+
directory: "/"
|
14 |
+
schedule:
|
15 |
+
interval: "weekly"
|
.github/workflows/docker-build.yml
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: GPTR tests
|
2 |
+
run-name: ${{ github.actor }} ran the GPTR tests flow
|
3 |
+
permissions:
|
4 |
+
contents: read
|
5 |
+
pull-requests: write
|
6 |
+
on:
|
7 |
+
workflow_dispatch: # Add this line to enable manual triggering
|
8 |
+
# pull_request:
|
9 |
+
# types: [opened, synchronize]
|
10 |
+
|
11 |
+
jobs:
|
12 |
+
docker:
|
13 |
+
runs-on: ubuntu-latest
|
14 |
+
environment: tests # Specify the environment to use for this job
|
15 |
+
env:
|
16 |
+
# Ensure these environment variables are set for the entire job
|
17 |
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
18 |
+
TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }}
|
19 |
+
LANGCHAIN_API_KEY: ${{ secrets.LANGCHAIN_API_KEY }}
|
20 |
+
steps:
|
21 |
+
- name: Git checkout
|
22 |
+
uses: actions/checkout@v3
|
23 |
+
|
24 |
+
- name: Set up QEMU
|
25 |
+
uses: docker/setup-qemu-action@v2
|
26 |
+
|
27 |
+
- name: Set up Docker Buildx
|
28 |
+
uses: docker/setup-buildx-action@v2
|
29 |
+
with:
|
30 |
+
driver: docker
|
31 |
+
|
32 |
+
# - name: Build Docker images
|
33 |
+
# uses: docker/build-push-action@v4
|
34 |
+
# with:
|
35 |
+
# push: false
|
36 |
+
# tags: gptresearcher/gpt-researcher:latest
|
37 |
+
# file: Dockerfile
|
38 |
+
|
39 |
+
- name: Set up Docker Compose
|
40 |
+
run: |
|
41 |
+
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
|
42 |
+
sudo chmod +x /usr/local/bin/docker-compose
|
43 |
+
- name: Run tests with Docker Compose
|
44 |
+
run: |
|
45 |
+
docker-compose --profile test run --rm gpt-researcher-tests
|
.gitignore
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#Ignore env containing secrets
|
2 |
+
.env
|
3 |
+
.venv
|
4 |
+
.envrc
|
5 |
+
|
6 |
+
#Ignore Virtual Env
|
7 |
+
env/
|
8 |
+
venv/
|
9 |
+
.venv/
|
10 |
+
|
11 |
+
# Other Environments
|
12 |
+
ENV/
|
13 |
+
env.bak/
|
14 |
+
venv.bak/
|
15 |
+
|
16 |
+
#Ignore generated outputs
|
17 |
+
outputs/
|
18 |
+
*.lock
|
19 |
+
dist/
|
20 |
+
gpt_researcher.egg-info/
|
21 |
+
|
22 |
+
#Ignore my local docs
|
23 |
+
my-docs/
|
24 |
+
|
25 |
+
#Ignore pycache
|
26 |
+
**/__pycache__/
|
27 |
+
|
28 |
+
#Ignore mypy cache
|
29 |
+
.mypy_cache/
|
30 |
+
node_modules
|
31 |
+
.idea
|
32 |
+
.DS_Store
|
33 |
+
.docusaurus
|
34 |
+
build
|
35 |
+
docs/build
|
36 |
+
|
37 |
+
.vscode/launch.json
|
38 |
+
.langgraph-data/
|
39 |
+
.next/
|
40 |
+
package-lock.json
|
41 |
+
|
42 |
+
#Vim swp files
|
43 |
+
*.swp
|
44 |
+
|
45 |
+
# Log files
|
46 |
+
logs/
|
47 |
+
*.orig
|
48 |
+
*.log
|
49 |
+
server_log.txt
|
50 |
+
|
51 |
+
#Cursor Rules
|
52 |
+
.cursorrules
|
53 |
+
CURSOR_RULES.md
|
CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Contributor Covenant Code of Conduct
|
2 |
+
|
3 |
+
## Our Pledge
|
4 |
+
|
5 |
+
We, as members, contributors, and leaders, pledge to make participation in our
|
6 |
+
community a harassment-free experience for everyone, regardless of age, body
|
7 |
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
8 |
+
identity and expression, level of experience, education, socio-economic status,
|
9 |
+
nationality, personal appearance, race, religion, sexual identity, or
|
10 |
+
orientation.
|
11 |
+
|
12 |
+
We commit to acting and interacting in ways that contribute to an open, welcoming,
|
13 |
+
diverse, inclusive, and healthy community.
|
14 |
+
|
15 |
+
## Our Standards
|
16 |
+
|
17 |
+
Examples of behavior that contributes to a positive environment for our
|
18 |
+
community include:
|
19 |
+
|
20 |
+
- Demonstrating empathy and kindness toward others
|
21 |
+
- Being respectful of differing opinions, viewpoints, and experiences
|
22 |
+
- Giving and gracefully accepting constructive feedback
|
23 |
+
- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
|
24 |
+
- Focusing on what is best not just for us as individuals, but for the
|
25 |
+
overall community
|
26 |
+
|
27 |
+
Examples of unacceptable behavior include:
|
28 |
+
|
29 |
+
- The use of sexualized language or imagery, and sexual attention or
|
30 |
+
advances of any kind
|
31 |
+
- Trolling, insulting or derogatory comments, and personal or political attacks
|
32 |
+
- Public or private harassment
|
33 |
+
- Publishing others' private information, such as a physical or email address, without their explicit permission
|
34 |
+
- Other conduct that could reasonably be considered inappropriate in a professional setting
|
35 |
+
|
36 |
+
## Enforcement Responsibilities
|
37 |
+
|
38 |
+
Community leaders are responsible for clarifying and enforcing our standards of
|
39 |
+
acceptable behavior and will take appropriate and fair corrective action in
|
40 |
+
response to any behavior deemed inappropriate, threatening, offensive,
|
41 |
+
or harmful.
|
42 |
+
|
43 |
+
Community leaders have the right and responsibility to remove, edit, or reject
|
44 |
+
comments, commits, code, wiki edits, issues, and other contributions that do not
|
45 |
+
align with this Code of Conduct, and will communicate reasons for moderation
|
46 |
+
decisions when appropriate.
|
47 |
+
|
48 |
+
## Scope
|
49 |
+
|
50 |
+
This Code of Conduct applies to all community spaces and also applies when
|
51 |
+
an individual is officially representing the community in public spaces.
|
52 |
+
Examples include using an official email address, posting via an official
|
53 |
+
social media account, or acting as an appointed representative at an online or offline event.
|
54 |
+
|
55 |
+
## Enforcement
|
56 |
+
|
57 |
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58 |
+
reported to the community leaders responsible for enforcement at
|
59 |
+
[[email protected]](mailto:[email protected]).
|
60 |
+
All complaints will be reviewed and investigated promptly and fairly.
|
61 |
+
|
62 |
+
All community leaders are obligated to respect the privacy and security of the
|
63 |
+
reporter of any incident.
|
64 |
+
|
65 |
+
## Enforcement Guidelines
|
66 |
+
|
67 |
+
Community leaders will follow these Community Impact Guidelines in determining
|
68 |
+
the consequences for any action they deem in violation of this Code of Conduct:
|
69 |
+
|
70 |
+
### 1. Correction
|
71 |
+
|
72 |
+
**Community Impact**: Use of inappropriate language or other behavior deemed
|
73 |
+
unprofessional or unwelcome in the community.
|
74 |
+
|
75 |
+
**Consequence**: A private, written warning from community leaders, providing
|
76 |
+
clarity around the nature of the violation and an explanation of why the
|
77 |
+
behavior was inappropriate. A public apology may be requested.
|
78 |
+
|
79 |
+
### 2. Warning
|
80 |
+
|
81 |
+
**Community Impact**: A violation through a single incident or series
|
82 |
+
of actions.
|
83 |
+
|
84 |
+
**Consequence**: A warning with consequences for continued behavior. No
|
85 |
+
interaction with the people involved, including unsolicited interaction with
|
86 |
+
those enforcing the Code of Conduct, for a specified period. This includes
|
87 |
+
avoiding interactions in community spaces and external channels like social media.
|
88 |
+
Violating these terms may lead to a temporary or permanent ban.
|
89 |
+
|
90 |
+
### 3. Temporary Ban
|
91 |
+
|
92 |
+
**Community Impact**: A serious violation of community standards, including
|
93 |
+
sustained inappropriate behavior.
|
94 |
+
|
95 |
+
**Consequence**: A temporary ban from any interaction or public
|
96 |
+
communication with the community for a specified period. No public or
|
97 |
+
private interaction with the people involved, including unsolicited interaction
|
98 |
+
with those enforcing the Code of Conduct, is allowed during this period.
|
99 |
+
Violating these terms may lead to a permanent ban.
|
100 |
+
|
101 |
+
### 4. Permanent Ban
|
102 |
+
|
103 |
+
**Community Impact**: Demonstrating a pattern of violation of community
|
104 |
+
standards, including sustained inappropriate behavior, harassment of an
|
105 |
+
individual, or aggression toward or disparagement of groups of individuals.
|
106 |
+
|
107 |
+
**Consequence**: A permanent ban from any public interaction within
|
108 |
+
the community.
|
109 |
+
|
110 |
+
## Attribution
|
111 |
+
|
112 |
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
113 |
+
version 2.0, available at
|
114 |
+
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
|
115 |
+
|
116 |
+
Community Impact Guidelines were inspired by [Mozilla's code of conduct
|
117 |
+
enforcement ladder](https://github.com/mozilla/diversity).
|
118 |
+
|
119 |
+
[homepage]: https://www.contributor-covenant.org
|
120 |
+
|
121 |
+
For answers to common questions about this code of conduct, see the FAQ at
|
122 |
+
https://www.contributor-covenant.org/faq. Translations are available at
|
123 |
+
https://www.contributor-covenant.org/translations.
|
CONTRIBUTING.md
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Contributing to GPT Researcher
|
2 |
+
|
3 |
+
First off, we'd like to welcome you and thank you for your interest and effort in contributing to our open-source project ❤️. Contributions of all forms are welcome—from new features and bug fixes to documentation and more.
|
4 |
+
|
5 |
+
We are on a mission to build the #1 AI agent for comprehensive, unbiased, and factual research online, and we need your support to achieve this grand vision.
|
6 |
+
|
7 |
+
Please take a moment to review this document to make the contribution process easy and effective for everyone involved.
|
8 |
+
|
9 |
+
## Reporting Issues
|
10 |
+
|
11 |
+
If you come across any issue or have an idea for an improvement, don't hesitate to create an issue on GitHub. Describe your problem in sufficient detail, providing as much relevant information as possible. This way, we can reproduce the issue before attempting to fix it or respond appropriately.
|
12 |
+
|
13 |
+
## Contributing Code
|
14 |
+
|
15 |
+
1. **Fork the repository and create your branch from `master`.**
|
16 |
+
If it’s not an urgent bug fix, branch from `master` and work on the feature or fix there.
|
17 |
+
|
18 |
+
2. **Make your changes.**
|
19 |
+
Implement your changes following best practices for coding in the project's language.
|
20 |
+
|
21 |
+
3. **Test your changes.**
|
22 |
+
Ensure that your changes pass all tests if any exist. If the project doesn’t have automated tests, test your changes manually to confirm they behave as expected.
|
23 |
+
|
24 |
+
4. **Follow the coding style.**
|
25 |
+
Ensure your code adheres to the coding conventions used throughout the project, including indentation, accurate comments, etc.
|
26 |
+
|
27 |
+
5. **Commit your changes.**
|
28 |
+
Make your Git commits informative and concise. This is very helpful for others when they look at the Git log.
|
29 |
+
|
30 |
+
6. **Push to your fork and submit a pull request.**
|
31 |
+
When your work is ready and passes tests, push your branch to your fork of the repository and submit a pull request from there.
|
32 |
+
|
33 |
+
7. **Pat yourself on the back and wait for review.**
|
34 |
+
Your work is done, congratulations! Now sit tight. The project maintainers will review your submission as soon as possible. They might suggest changes or ask for improvements. Both constructive conversation and patience are key to the collaboration process.
|
35 |
+
|
36 |
+
## Documentation
|
37 |
+
|
38 |
+
If you would like to contribute to the project's documentation, please follow the same steps: fork the repository, make your changes, test them, and submit a pull request.
|
39 |
+
|
40 |
+
Documentation is a vital part of any software. It's not just about having good code; ensuring that users and contributors understand what's going on, how to use the software, or how to contribute is crucial.
|
41 |
+
|
42 |
+
We're grateful for all our contributors, and we look forward to building the world's leading AI research agent hand-in-hand with you. Let's harness the power of open source and AI to change the world together!
|
CURSOR_RULES.md
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
> **Note**: This is a readable copy of the `.cursorrules` file maintained for legibility. The actual rules are implemented from the `.cursorrules` file in the root directory.
|
2 |
+
|
3 |
+
# GPT-Researcher Cursor Rules
|
4 |
+
|
5 |
+
## Project Overview
|
6 |
+
This project, named GPT-Researcher, is an LLM-based autonomous agent that conducts local and web research on any topic and generates a comprehensive report with citations. It is built using Next.js and TypeScript, integrating various libraries for their strengths.
|
7 |
+
|
8 |
+
Your primary goal is to help with:
|
9 |
+
- Next.js app router patterns
|
10 |
+
- TypeScript type safety
|
11 |
+
- Tailwind CSS best practices
|
12 |
+
- Code quality standards
|
13 |
+
- Python/FastAPI backend optimizations
|
14 |
+
|
15 |
+
## Key URLs
|
16 |
+
- Project Home Page: https://gptr.dev/
|
17 |
+
- GitHub Repository: https://github.com/assafelovic/gpt-researcher
|
18 |
+
- Documentation: https://docs.gptr.dev/
|
19 |
+
|
20 |
+
## Project Structure
|
21 |
+
- Frontend user interface built with Next.js, TypeScript, and Tailwind CSS in `/frontend`
|
22 |
+
- Static FastAPI version for lightweight deployments
|
23 |
+
- Next.js version for production use with enhanced features
|
24 |
+
|
25 |
+
- Multi-agent research system using LangChain and LangGraph in `/backend/multi_agents`
|
26 |
+
- Browser, Editor, Researcher, Reviewer, Revisor, Writer, and Publisher agents
|
27 |
+
- Task configuration and agent coordination
|
28 |
+
|
29 |
+
- Document processing using Unstructured and PyMuPDF in `/backend/document_processing`
|
30 |
+
- PDF, DOCX, and web content parsing
|
31 |
+
- Text extraction and preprocessing
|
32 |
+
|
33 |
+
- Report generation using LangChain and Jinja2 templates in `/backend/report_generation`
|
34 |
+
- Template-based report structuring
|
35 |
+
- Dynamic content formatting
|
36 |
+
|
37 |
+
- Multiple output formats in `/backend/output_formats`
|
38 |
+
- PDF via md2pdf
|
39 |
+
- Markdown via mistune
|
40 |
+
- DOCX via python-docx
|
41 |
+
- Format conversion utilities
|
42 |
+
- Export functionality
|
43 |
+
|
44 |
+
- GPT Researcher core functionality in `/gpt_researcher`
|
45 |
+
- Web scraping and content aggregation
|
46 |
+
- Research planning and execution
|
47 |
+
- Source validation and tracking
|
48 |
+
- Query processing and response generation
|
49 |
+
|
50 |
+
- Testing infrastructure in `/tests`
|
51 |
+
- Unit tests for individual components
|
52 |
+
- Integration tests for agent interactions
|
53 |
+
- End-to-end research workflow tests
|
54 |
+
- Mock data and fixtures for testing
|
55 |
+
|
56 |
+
## Language Model Configuration
|
57 |
+
- Default model: gpt-4-turbo
|
58 |
+
- Alternative models: gpt-3.5-turbo, claude-3-opus
|
59 |
+
- Temperature settings for different tasks
|
60 |
+
- Context window management
|
61 |
+
- Token limit handling
|
62 |
+
- Cost optimization strategies
|
63 |
+
|
64 |
+
## Error Handling
|
65 |
+
- Research failure recovery
|
66 |
+
- API rate limiting
|
67 |
+
- Network timeout handling
|
68 |
+
- Invalid input management
|
69 |
+
- Source validation errors
|
70 |
+
- Report generation failures
|
71 |
+
|
72 |
+
## Performance
|
73 |
+
- Parallel processing strategies
|
74 |
+
- Caching mechanisms
|
75 |
+
- Memory management
|
76 |
+
- Response streaming
|
77 |
+
- Resource allocation
|
78 |
+
- Query optimization
|
79 |
+
|
80 |
+
## Development Workflow
|
81 |
+
- Branch naming conventions
|
82 |
+
- Commit message format
|
83 |
+
- PR review process
|
84 |
+
- Testing requirements
|
85 |
+
- Documentation updates
|
86 |
+
- Version control guidelines
|
87 |
+
|
88 |
+
## API Documentation
|
89 |
+
- REST endpoints
|
90 |
+
- WebSocket events
|
91 |
+
- Request/Response formats
|
92 |
+
- Authentication methods
|
93 |
+
- Rate limits
|
94 |
+
- Error codes
|
95 |
+
|
96 |
+
## Monitoring
|
97 |
+
- Performance metrics
|
98 |
+
- Error tracking
|
99 |
+
- Usage statistics
|
100 |
+
- Cost monitoring
|
101 |
+
- Research quality metrics
|
102 |
+
- User feedback tracking
|
103 |
+
|
104 |
+
## Frontend Components
|
105 |
+
- Static FastAPI version for lightweight deployments
|
106 |
+
- Next.js version for production use with enhanced features
|
107 |
+
|
108 |
+
## Backend Components
|
109 |
+
- Multi-agent system architecture
|
110 |
+
- Document processing pipeline
|
111 |
+
- Report generation system
|
112 |
+
- Output format handlers
|
113 |
+
|
114 |
+
## Core Research Components
|
115 |
+
- Web scraping and aggregation
|
116 |
+
- Research planning and execution
|
117 |
+
- Source validation
|
118 |
+
- Query processing
|
119 |
+
|
120 |
+
## Testing
|
121 |
+
- Unit tests
|
122 |
+
- Integration tests
|
123 |
+
- End-to-end tests
|
124 |
+
- Performance testing
|
125 |
+
|
126 |
+
## Rule Violation Monitoring
|
127 |
+
- Alert developer when changes conflict with project structure
|
128 |
+
- Warn about deviations from coding standards
|
129 |
+
- Flag unauthorized framework or library additions
|
130 |
+
- Monitor for security and performance anti-patterns
|
131 |
+
- Track API usage patterns that may violate guidelines
|
132 |
+
- Report TypeScript strict mode violations
|
133 |
+
- Identify accessibility compliance issues
|
134 |
+
|
135 |
+
## Development Guidelines
|
136 |
+
- Use TypeScript with strict mode enabled
|
137 |
+
- Follow ESLint and Prettier configurations
|
138 |
+
- Ensure components are responsive and accessible
|
139 |
+
- Use Tailwind CSS for styling, following the project's design system
|
140 |
+
- Minimize AI-generated comments, prefer self-documenting code
|
141 |
+
- Follow React best practices and hooks guidelines
|
142 |
+
- Validate all user inputs and API responses
|
143 |
+
- Use existing components as reference implementations
|
144 |
+
|
145 |
+
## Important Scripts
|
146 |
+
- `npm run dev`: Start development server
|
147 |
+
- `npm run build`: Build for production
|
148 |
+
- `npm run test`: Run test suite
|
149 |
+
- `python -m pytest`: Run Python tests
|
150 |
+
- `python -m uvicorn backend.server.server:app --host=0.0.0.0 --port=8000`: Start FastAPI server
|
151 |
+
- `python -m uvicorn backend.server.server:app --reload`: Start FastAPI server with auto-reload for development
|
152 |
+
- `python main.py`: Run the main application directly
|
153 |
+
- `docker-compose up`: Start all services
|
154 |
+
- `docker-compose run gpt-researcher-tests`: Run test suite in container
|
155 |
+
|
156 |
+
## AI Integration Guidelines
|
157 |
+
- Prioritize type safety in all AI interactions
|
158 |
+
- Follow LangChain and LangGraph best practices
|
159 |
+
- Implement proper error handling for AI responses
|
160 |
+
- Maintain context window limits
|
161 |
+
- Handle rate limiting and API quotas
|
162 |
+
- Validate AI outputs before processing
|
163 |
+
- Log AI interactions for debugging
|
164 |
+
|
165 |
+
## Lexicon
|
166 |
+
- **GPT Researcher**: Autonomous research agent system
|
167 |
+
- **Multi-Agent System**: Coordinated AI agents for research tasks
|
168 |
+
- **Research Pipeline**: End-to-end research workflow
|
169 |
+
- **Agent Roles**: Browser, Editor, Researcher, Reviewer, Revisor, Writer, Publisher
|
170 |
+
- **Source Validation**: Verification of research sources
|
171 |
+
- **Report Generation**: Process of creating final research output
|
172 |
+
|
173 |
+
## Additional Resources
|
174 |
+
- [Next.js Documentation](https://nextjs.org/docs)
|
175 |
+
- [TypeScript Handbook](https://www.typescriptlang.org/docs/)
|
176 |
+
- [Tailwind CSS Documentation](https://tailwindcss.com/docs)
|
177 |
+
- [LangChain Documentation](https://python.langchain.com/docs/)
|
178 |
+
- [FastAPI Documentation](https://fastapi.tiangolo.com/)
|
179 |
+
- [Project Documentation](https://docs.gptr.dev/)
|
180 |
+
|
181 |
+
_Note: End all your comments with a :-) symbol._
|
Dockerfile
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Stage 1: Browser and build tools installation
|
2 |
+
FROM python:3.11.4-slim-bullseye AS install-browser
|
3 |
+
|
4 |
+
# Install Chromium, Chromedriver, Firefox, Geckodriver, and build tools in one layer
|
5 |
+
RUN apt-get update && \
|
6 |
+
apt-get satisfy -y "chromium, chromium-driver (>= 115.0)" && \
|
7 |
+
apt-get install -y --no-install-recommends firefox-esr wget build-essential && \
|
8 |
+
wget https://github.com/mozilla/geckodriver/releases/download/v0.33.0/geckodriver-v0.33.0-linux64.tar.gz && \
|
9 |
+
tar -xvzf geckodriver-v0.33.0-linux64.tar.gz && \
|
10 |
+
chmod +x geckodriver && \
|
11 |
+
mv geckodriver /usr/local/bin/ && \
|
12 |
+
rm geckodriver-v0.33.0-linux64.tar.gz && \
|
13 |
+
chromium --version && chromedriver --version && \
|
14 |
+
rm -rf /var/lib/apt/lists/* # Clean up apt lists to reduce image size
|
15 |
+
|
16 |
+
# Stage 2: Python dependencies installation
|
17 |
+
FROM install-browser AS gpt-researcher-install
|
18 |
+
|
19 |
+
ENV PIP_ROOT_USER_ACTION=ignore
|
20 |
+
WORKDIR /usr/src/app
|
21 |
+
|
22 |
+
# Copy and install Python dependencies in a single layer to optimize cache usage
|
23 |
+
COPY ./requirements.txt ./requirements.txt
|
24 |
+
COPY ./multi_agents/requirements.txt ./multi_agents/requirements.txt
|
25 |
+
|
26 |
+
RUN pip install --no-cache-dir -r requirements.txt && \
|
27 |
+
pip install --no-cache-dir -r multi_agents/requirements.txt
|
28 |
+
|
29 |
+
# Stage 3: Final stage with non-root user and app
|
30 |
+
FROM gpt-researcher-install AS gpt-researcher
|
31 |
+
|
32 |
+
# Create a non-root user for security
|
33 |
+
RUN useradd -ms /bin/bash gpt-researcher && \
|
34 |
+
chown -R gpt-researcher:gpt-researcher /usr/src/app
|
35 |
+
|
36 |
+
USER gpt-researcher
|
37 |
+
WORKDIR /usr/src/app
|
38 |
+
|
39 |
+
# Copy the rest of the application files with proper ownership
|
40 |
+
COPY --chown=gpt-researcher:gpt-researcher ./ ./
|
41 |
+
|
42 |
+
# Expose the application's port
|
43 |
+
EXPOSE 8000
|
44 |
+
|
45 |
+
# Define the default command to run the application
|
46 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
Procfile
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
web: python -m uvicorn backend.server.server:app --host=0.0.0.0 --port=${PORT}
|
README-ja_JP.md
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center">
|
2 |
+
<!--<h1 style="display: flex; align-items: center; gap: 10px;">
|
3 |
+
<img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/a45bac7c-092c-42e5-8eb6-69acbf20dde5" alt="Logo" width="25">
|
4 |
+
GPT Researcher
|
5 |
+
</h1>-->
|
6 |
+
<img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/20af8286-b386-44a5-9a83-3be1365139c3" alt="Logo" width="80">
|
7 |
+
|
8 |
+
|
9 |
+
####
|
10 |
+
|
11 |
+
[](https://gptr.dev)
|
12 |
+
[](https://docs.gptr.dev)
|
13 |
+
[](https://discord.gg/QgZXvJAccX)
|
14 |
+
|
15 |
+
[](https://badge.fury.io/py/gpt-researcher)
|
16 |
+

|
17 |
+
[](https://colab.research.google.com/github/assafelovic/gpt-researcher/blob/master/docs/docs/examples/pip-run.ipynb)
|
18 |
+
[](https://hub.docker.com/r/gptresearcher/gpt-researcher)
|
19 |
+
[](https://twitter.com/assaf_elovic)
|
20 |
+
|
21 |
+
[English](README.md) |
|
22 |
+
[中文](README-zh_CN.md) |
|
23 |
+
[日本語](README-ja_JP.md) |
|
24 |
+
[한국어](README-ko_KR.md)
|
25 |
+
</div>
|
26 |
+
|
27 |
+
# 🔎 GPT Researcher
|
28 |
+
|
29 |
+
**GPT Researcher は、さまざまなタスクに対する包括的なオンラインリサーチのために設計された自律エージェントです。**
|
30 |
+
|
31 |
+
このエージェントは、詳細で事実に基づいた偏りのない研究レポートを生成することができ、関連するリソース、アウトライン、およびレッスンに焦点を当てるためのカスタマイズオプションを提供します。最近の [Plan-and-Solve](https://arxiv.org/abs/2305.04091) および [RAG](https://arxiv.org/abs/2005.11401) 論文に触発され、GPT Researcher は速度、決定論、および信頼性の問題に対処し、同期操作ではなく並列化されたエージェント作業を通じてより安定したパフォーマンスと高速化を提供します。
|
32 |
+
|
33 |
+
**私たちの使命は、AIの力を活用して、個人や組織に正確で偏りのない事実に基づいた情報を提供することです。**
|
34 |
+
|
35 |
+
## なぜGPT Researcherなのか?
|
36 |
+
|
37 |
+
- 手動の研究タスクで客観的な結論を形成するには時間がかかることがあり、適切なリソースと情報を見つけるのに数週間かかることもあります。
|
38 |
+
- 現在のLLMは過去の情報に基づいて訓練されており、幻覚のリスクが高く、研究タスクにはほとんど役に立ちません。
|
39 |
+
- 現在のLLMは短いトークン出力に制限されており、長く詳細な研究レポート(2,000語以上)には不十分です。
|
40 |
+
- Web検索を可能にするサービス(ChatGPT + Webプラグインなど)は、限られたリソースとコンテンツのみを考慮し、場合によっては表面的で偏った回答をもたらします。
|
41 |
+
- Webソースの選択のみを使用すると、研究タスクの正しい結論を導く際にバイアスが生じる可能性があります。
|
42 |
+
|
43 |
+
## アーキテクチャ
|
44 |
+
主なアイデアは、「プランナー」と「実行」エージェントを実行することであり、プランナーは研究する質問を生成し、実行エージェントは生成された各研究質問に基づいて最も関連性の高い情報を探します。最後に、プランナーはすべての関連情報をフィルタリングおよび集約し、研究レポートを作成します。<br /> <br />
|
45 |
+
エージェントは、研究タスクを完了するために gpt-4o-mini と gpt-4o(128K コンテキスト)の両方を活用します。必要に応じてそれぞれを使用することでコストを最適化します。**平均的な研究タスクは完了するのに約3分かかり、コストは約0.1ドルです**。
|
46 |
+
|
47 |
+
<div align="center">
|
48 |
+
<img align="center" height="500" src="https://cowriter-images.s3.amazonaws.com/architecture.png">
|
49 |
+
</div>
|
50 |
+
|
51 |
+
|
52 |
+
詳細説明:
|
53 |
+
* 研究クエリまたはタスクに基づいて特定のドメインエージェントを作成します。
|
54 |
+
* 研究タスクに対する客観的な意見を形成する一連の研究質問を生成します。
|
55 |
+
* 各研究質問に対して、与えられたタスクに関連する情報をオンラインリソースから収集��るクローラーエージェントをトリガーします。
|
56 |
+
* 各収集されたリソースについて、関連情報に基づいて要約し、そのソースを追跡します。
|
57 |
+
* 最後に、すべての要約されたソースをフィルタリングおよび集約し、最終的な研究レポートを生成します。
|
58 |
+
|
59 |
+
## デモ
|
60 |
+
https://github.com/assafelovic/gpt-researcher/assets/13554167/a00c89a6-a295-4dd0-b58d-098a31c40fda
|
61 |
+
|
62 |
+
## チュートリアル
|
63 |
+
- [動作原理](https://docs.gptr.dev/blog/building-gpt-researcher)
|
64 |
+
- [インストール方法](https://www.loom.com/share/04ebffb6ed2a4520a27c3e3addcdde20?sid=da1848e8-b1f1-42d1-93c3-5b0b9c3b24ea)
|
65 |
+
- [ライブデモ](https://www.loom.com/share/6a3385db4e8747a1913dd85a7834846f?sid=a740fd5b-2aa3-457e-8fb7-86976f59f9b8)
|
66 |
+
|
67 |
+
## 特徴
|
68 |
+
- 📝 研究、アウトライン、リソース、レッスンレポートを生成
|
69 |
+
- 🌐 各研究で20以上のWebソースを集約し、客観的で事実に基づいた結論を形成
|
70 |
+
- 🖥️ 使いやすいWebインターフェース(HTML/CSS/JS)を含む
|
71 |
+
- 🔍 JavaScriptサポート付きのWebソースをスクレイピング
|
72 |
+
- 📂 訪問および使用されたWebソースのコンテキストを追跡
|
73 |
+
- 📄 研究レポートをPDF、Wordなどにエクスポート
|
74 |
+
|
75 |
+
## 📖 ドキュメント
|
76 |
+
|
77 |
+
完全なドキュメントについては、[こちら](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started)を参照してください:
|
78 |
+
|
79 |
+
- 入門(インストール、環境設定、簡単な例)
|
80 |
+
- 操作例(デモ、統合、dockerサポート)
|
81 |
+
- 参考資料(API完全ドキュメント)
|
82 |
+
- Tavilyアプリケーションインターフェースの統合(コア概念の高度な説明)
|
83 |
+
|
84 |
+
## クイックスタート
|
85 |
+
> **ステップ 0** - Python 3.11 以降をインストールします。[こちら](https://www.tutorialsteacher.com/python/install-python)を参照して、ステップバイステップのガイドを確認してください。
|
86 |
+
|
87 |
+
<br />
|
88 |
+
|
89 |
+
> **ステップ 1** - プロジェクトをダウンロードします
|
90 |
+
|
91 |
+
```bash
|
92 |
+
$ git clone https://github.com/assafelovic/gpt-researcher.git
|
93 |
+
$ cd gpt-researcher
|
94 |
+
```
|
95 |
+
|
96 |
+
<br />
|
97 |
+
|
98 |
+
> **ステップ2** - 依存関係をインストールします
|
99 |
+
```bash
|
100 |
+
$ pip install -r requirements.txt
|
101 |
+
```
|
102 |
+
<br />
|
103 |
+
|
104 |
+
> **ステップ 3** - OpenAI キーと Tavily API キーを使用して .env ファイルを作成するか、直接エクスポートします
|
105 |
+
|
106 |
+
```bash
|
107 |
+
$ export OPENAI_API_KEY={Your OpenAI API Key here}
|
108 |
+
```
|
109 |
+
```bash
|
110 |
+
$ export TAVILY_API_KEY={Your Tavily API Key here}
|
111 |
+
```
|
112 |
+
|
113 |
+
- **LLMには、[OpenAI GPT](https://platform.openai.com/docs/guides/gpt) を使用することをお勧めします**が、[Langchain Adapter](https://python.langchain.com/docs/guides/adapters/openai) がサポートする他の LLM モデル(オープンソースを含む)を使用することもできます。llm モデルとプロバイダーを config/config.py で変更するだけです。[このガイド](https://python.langchain.com/docs/integrations/llms/) に従って、LLM を Langchain と統合する方法を学んでください。
|
114 |
+
- **検索エンジンには、[Tavily Search API](https://app.tavily.com)(LLM 用に最適化されています)を使用することをお勧めします**が、他の検索エンジンを選択することもできます。config/config.py で検索プロバイダーを「duckduckgo」、「googleAPI」、「googleSerp」、「searchapi」、「searx」に変更するだけです。次に、config.py ファイルに対応する env API キーを追加します。
|
115 |
+
- **最適なパフォーマンスを得るために、[OpenAI GPT](https://platform.openai.com/docs/guides/gpt) モデルと [Tavily Search API](https://app.tavily.com) を使用することを強くお勧めします。**
|
116 |
+
<br />
|
117 |
+
|
118 |
+
> **ステップ 4** - FastAPI を使用してエージェントを実行します
|
119 |
+
|
120 |
+
```bash
|
121 |
+
$ uvicorn main:app --reload
|
122 |
+
```
|
123 |
+
<br />
|
124 |
+
|
125 |
+
> **ステップ 5** - 任意のブラウザで http://localhost:8000 にアクセスして、リサーチを楽しんでください!
|
126 |
+
|
127 |
+
Docker の使い方や機能とサービスの詳細については、[ドキュメント](https://docs.gptr.dev) ページをご覧ください。
|
128 |
+
|
129 |
+
## 🚀 貢献
|
130 |
+
私たちは貢献を大歓迎します!興味がある場合は、[貢献](CONTRIBUTING.md) をご覧ください。
|
131 |
+
|
132 |
+
私たちの[ロードマップ](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) ページを確認し、私たちの使命に参加することに興味がある場合は、[Discord コミュニティ](https://discord.gg/QgZXvJAccX) を通じてお問い合わせください。
|
133 |
+
|
134 |
+
## ✉️ サポート / お問い合わせ
|
135 |
+
- [コミュニティディスカッション](https://discord.gg/spBgZmm3Xe)
|
136 |
+
- 私たちのメール: [email protected]
|
137 |
+
|
138 |
+
## 🛡 免責事項
|
139 |
+
|
140 |
+
このプロジェクト「GPT Researcher」は実験的なアプリケーションであり、明示または黙示のいかなる保証もなく「現状のまま」提供されます。私たちは学術目的のためにMITライセンスの下でコードを共有しています。ここに記載されている内容は学術的なアドバイスではなく、学術論文や研究論文での使用を推奨するものではありません。
|
141 |
+
|
142 |
+
私たちの客観的な研究主張に対する見解:
|
143 |
+
1. 私たちのスクレイピングシステムの主な目的は、不正確な事実を減らすことです。どうやって解決するのか?私たちがスクレイピングするサイトが多ければ多いほど、誤ったデータの可能性は低くなります。各研究で20の情報を収集し、それらがすべて間違っている可能性は非常に低いです。
|
144 |
+
2. 私たちの目標はバイアスを排除することではなく、可能な限りバイアスを減らすことです。**私たちはここでコミュニティとして最も効果的な人間と機械の相互作用を探求しています**。
|
145 |
+
3. 研究プロセスでは、人々も自分が研究しているトピックに対してすでに意見を持っているため、バイアスがかかりやすいです。このツールは多くの意見を収集し、偏った人が決して読まないであろう多様な見解を均等に説明します。
|
146 |
+
|
147 |
+
**GPT-4 言語モデルの使用は、トークンの使用により高額な費用がかかる可能性があることに注意してください**。このプロジェクトを利用することで、トークンの使用状況と関連する費用を監視および管理する責任があることを認めたことになります。OpenAI API の使用状況を定期的に確認し、予期しない料金が発生しないように必要な制限やアラートを設定することを強くお勧めします。
|
148 |
+
|
149 |
+
---
|
150 |
+
|
151 |
+
<p align="center">
|
152 |
+
<a href="https://star-history.com/#assafelovic/gpt-researcher">
|
153 |
+
<picture>
|
154 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date&theme=dark" />
|
155 |
+
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
|
156 |
+
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
|
157 |
+
</picture>
|
158 |
+
</a>
|
159 |
+
</p>
|
README-ko_KR.md
ADDED
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center">
|
2 |
+
<!--<h1 style="display: flex; align-items: center; gap: 10px;">
|
3 |
+
<img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/a45bac7c-092c-42e5-8eb6-69acbf20dde5" alt="Logo" width="25">
|
4 |
+
GPT Researcher
|
5 |
+
</h1>-->
|
6 |
+
<img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/20af8286-b386-44a5-9a83-3be1365139c3" alt="Logo" width="80">
|
7 |
+
|
8 |
+
|
9 |
+
####
|
10 |
+
|
11 |
+
[](https://gptr.dev)
|
12 |
+
[](https://docs.gptr.dev)
|
13 |
+
[](https://discord.gg/QgZXvJAccX)
|
14 |
+
|
15 |
+
[](https://badge.fury.io/py/gpt-researcher)
|
16 |
+

|
17 |
+
[](https://colab.research.google.com/github/assafelovic/gpt-researcher/blob/master/docs/docs/examples/pip-run.ipynb)
|
18 |
+
[](https://hub.docker.com/r/gptresearcher/gpt-researcher)
|
19 |
+
[](https://twitter.com/assaf_elovic)
|
20 |
+
|
21 |
+
[English](README.md) |
|
22 |
+
[中文](README-zh_CN.md) |
|
23 |
+
[日本語](README-ja_JP.md) |
|
24 |
+
[한국어](README-ko_KR.md)
|
25 |
+
</div>
|
26 |
+
|
27 |
+
# 🔎 GPT Researcher
|
28 |
+
|
29 |
+
**GPT Researcher는 다양한 작업을 대해 포괄적인 온라인 연구를 수행하도록 설계된 자율 에이전트입니다.**
|
30 |
+
|
31 |
+
이 에이전트는 세부적이고 사실에 기반하며 편견 없는 연구 보고서를 생성할 수 있으며, 관련 리소스와 개요에 초점을 맞춘 맞춤형 옵션을 제공합니다. 최근 발표된 [Plan-and-Solve](https://arxiv.org/abs/2305.04091) 및 [RAG](https://arxiv.org/abs/2005.11401) 논문에서 영감을 받아 GPT Researcher는 잘못된 정보, 속도, 결정론적 접근 방식, 신뢰성 문제를 해결하고, 동기화 작업이 아닌 병렬 에이전트 작업을 통해 더 안정적이고 빠른 성능을 제공합니다.
|
32 |
+
|
33 |
+
**우리의 목표는 AI의 힘을 활용하여 개인과 조직에게 정확하고 편향 없는 사실에 기반한 정보를 제공하는 것입니다.**
|
34 |
+
|
35 |
+
## 왜 GPT Researcher인가?
|
36 |
+
|
37 |
+
- 직접 수행하는 연구 과정은 객관적인 결론을 도출하는 데 시간이 오래 걸리며, 적절한 리소스와 정보를 찾는 데 몇 주가 걸릴 수 있습니다.
|
38 |
+
- 현재의 대규모 언어 모델(LLM)은 과거 정보에 기반해 훈련되었으며, 환각 현상이 발생할 위험이 높아 연구 작업에는 적합하지 않습니다.
|
39 |
+
- 현재 LLM은 짧은 토큰 출력으로 제한되며, 2,000단어 이상의 길고 자세한 연구 보고서를 작성하는 데는 충분하지 않습니다.
|
40 |
+
- 웹 검색을 지원하는 서비스(예: ChatGPT 또는 Perplexity)는 제한된 리소스와 콘텐츠만을 고려하여 경우에 따라 피상적이고 편향된 답변을 제공합니다.
|
41 |
+
- 웹 소스만을 사용하면 연구 작업에서 올바른 결론을 도출할 때 편향이 발생할 수 있습니다.
|
42 |
+
|
43 |
+
## 데모
|
44 |
+
https://github.com/user-attachments/assets/092e9e71-7e27-475d-8c4f-9dddd28934a3
|
45 |
+
|
46 |
+
## 아키텍처
|
47 |
+
주요 아이디어는 "플래너"와 "실행" 에이전트를 실행하는 것으로, 플래너는 연구할 질문을 생성하고, 실행 에이전트는 생성된 각 연구 질문에 따라 가장 관련성 높은 정보를 찾습니다. 마지막으로 플래너는 모든 관련 정보를 필터링하고 집계하여 연구 보고서를 작성합니다.
|
48 |
+
<br /> <br />
|
49 |
+
에이전트는 `gpt-4o-mini`와 `gpt-4o`(128K 컨텍스트)를 활용하여 연구 작업을 완료합니다. 필요에 따라 각각을 사용하여 비용을 최적화합니다. **평균 연구 작업은 약 2분이 소요되며, 비용은 약 $0.005입니다.**.
|
50 |
+
|
51 |
+
<div align="center">
|
52 |
+
<img align="center" height="600" src="https://github.com/assafelovic/gpt-researcher/assets/13554167/4ac896fd-63ab-4b77-9688-ff62aafcc527">
|
53 |
+
</div>
|
54 |
+
|
55 |
+
구체적으로:
|
56 |
+
* 연구 쿼리 또는 작업을 기반으로 도메인별 에이전트를 생성합니다.
|
57 |
+
* 주어진 작업에 대해 객관적인 의견을 형성할 수 있는 일련의 연구 질문을 생성합니다.
|
58 |
+
* 각 연구 질문에 대해 크롤러 에이전트를 실행하여 작업과 관련된 정보를 온라인 리소스에서 수집합니다.
|
59 |
+
* 수집된 각 리소스에서 관련 정보를 요약하고 출처를 기록합니다.
|
60 |
+
* 마지��으로, 요약된 모든 정보를 필터링하고 집계하여 최종 연구 보고서를 생성합니다.
|
61 |
+
|
62 |
+
## 튜토리얼
|
63 |
+
- [동작원리](https://docs.gptr.dev/blog/building-gpt-researcher)
|
64 |
+
- [설치방법](https://www.loom.com/share/04ebffb6ed2a4520a27c3e3addcdde20?sid=da1848e8-b1f1-42d1-93c3-5b0b9c3b24ea)
|
65 |
+
- [라이브 데모](https://www.loom.com/share/6a3385db4e8747a1913dd85a7834846f?sid=a740fd5b-2aa3-457e-8fb7-86976f59f9b8)
|
66 |
+
|
67 |
+
|
68 |
+
## 기능
|
69 |
+
- 📝 로컬 문서 및 웹 소스를 사용하여 연구, 개요, 리소스 및 학습 보고서 생성
|
70 |
+
- 📜 2,000단어 이상의 길고 상세한 연구 보고서 생성 가능
|
71 |
+
- 🌐 연구당 20개 이상의 웹 소스를 집계하여 객관적이고 사실에 기반한 결론 도출
|
72 |
+
- 🖥️ 경량 HTML/CSS/JS와 프로덕션용 (NextJS + Tailwind) UX/UI 포함
|
73 |
+
- 🔍 자바스크립트 지원 웹 소스 스크래핑 기능
|
74 |
+
- 📂 연구 과정에서 맥락과 메모리 추적 및 유지
|
75 |
+
- 📄 연구 보고서를 PDF, Word 등으로 내보내기 지원
|
76 |
+
|
77 |
+
## 📖 문서
|
78 |
+
|
79 |
+
전체 문서(설치, 환경 설정, 간단한 예시)를 보려면 [여기](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started)를 참조하세요.
|
80 |
+
|
81 |
+
- 시작하기 (설치, 환경 설정, 간단한 예시)
|
82 |
+
- 맞춤 설정 및 구성
|
83 |
+
- 사용 방법 예시 (데모, 통합, 도커 지원)
|
84 |
+
- 참고자료 (전체 API 문서)
|
85 |
+
|
86 |
+
## ⚙️ 시작하기
|
87 |
+
### 설치
|
88 |
+
> **1단계** - Python 3.11 또는 그 이상의 버전을 설치하세요. [여기](https://www.tutorialsteacher.com/python/install-python)를 참조하여 단계별 가이드를 확인하세요.
|
89 |
+
|
90 |
+
> **2단계** - 프로젝트를 다운로드하고 해당 디렉토리로 이동하세요.
|
91 |
+
|
92 |
+
```bash
|
93 |
+
git clone https://github.com/assafelovic/gpt-researcher.git
|
94 |
+
cd gpt-researcher
|
95 |
+
```
|
96 |
+
|
97 |
+
> **3단계** - 두 가지 방법으로 API 키를 설정하세요: 직접 export하거나 `.env` 파일에 저장하세요.
|
98 |
+
|
99 |
+
Linux/Windows에서 임시 설정을 하려면 export 방법을 사용하세요:
|
100 |
+
|
101 |
+
```bash
|
102 |
+
export OPENAI_API_KEY={OpenAI API 키 입력}
|
103 |
+
export TAVILY_API_KEY={Tavily API 키 입력}
|
104 |
+
```
|
105 |
+
|
106 |
+
더 영구적인 설정을 원한다면, 현재의 `gpt-researcher` 디렉토리에 `.env` 파일을 생성하고 환경 변수를 입력하세요 (export 없이).
|
107 |
+
|
108 |
+
- 기본 LLM은 [GPT](https://platform.openai.com/docs/guides/gpt)이지만, `claude`, `ollama3`, `gemini`, `mistral` 등 다른 LLM도 사용할 수 있습니다. LLM 제공자를 변경하는 방법은 [LLMs 문서](https://docs.gptr.dev/docs/gpt-researcher/llms/llms)를 참조하세요. 이 프로젝트는 OpenAI GPT 모델에 최적화되어 있습니다.
|
109 |
+
- 기본 검색기는 [Tavily](https://app.tavily.com)이지만, `duckduckgo`, `google`, `bing`, `searchapi`, `serper`, `searx`, `arxiv`, `exa` 등의 검색기를 사용할 수 있습니다. 검색 제공자를 변경하는 방법은 [검색기 문서](https://docs.gptr.dev/docs/gpt-researcher/retrievers)를 참조하세요.
|
110 |
+
|
111 |
+
### 빠른 시작
|
112 |
+
|
113 |
+
> **1단계** - 필요한 종속성 설치
|
114 |
+
|
115 |
+
```bash
|
116 |
+
pip install -r requirements.txt
|
117 |
+
```
|
118 |
+
|
119 |
+
> **2단계** - FastAPI로 에이전트 실행
|
120 |
+
|
121 |
+
```bash
|
122 |
+
python -m uvicorn main:app --reload
|
123 |
+
```
|
124 |
+
|
125 |
+
> **3단계** - 브라우저에서 http://localhost:8000 으로 이동하여 연구를 시작하세요!
|
126 |
+
|
127 |
+
<br />
|
128 |
+
|
129 |
+
**[Poetry](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started#poetry) 또는 [가상 환경](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started#virtual-environment)에 대해 배우고 싶다면, [문서](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started)를 참조하세요.**
|
130 |
+
|
131 |
+
### PIP 패키지로 실행하기
|
132 |
+
```bash
|
133 |
+
pip install gpt-researcher
|
134 |
+
```
|
135 |
+
|
136 |
+
```python
|
137 |
+
...
|
138 |
+
from gpt_researcher import GPTResearcher
|
139 |
+
|
140 |
+
query = "왜 Nvidia 주식이 오르고 있나요?"
|
141 |
+
researcher = GPTResearcher(query=query, report_type="research_report")
|
142 |
+
# 주어진 질문에 대한 연구 수행
|
143 |
+
research_result = await researcher.conduct_research()
|
144 |
+
# 보고서 작성
|
145 |
+
report = await researcher.write_report()
|
146 |
+
...
|
147 |
+
```
|
148 |
+
|
149 |
+
**더 많은 예제와 구성 옵션은 [PIP 문서](https://docs.gptr.dev/docs/gpt-researcher/gptr/pip-package)를 참조하세요.**
|
150 |
+
|
151 |
+
## Docker로 실행
|
152 |
+
|
153 |
+
> **1단계** - [Docker 설치](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started-with-docker)
|
154 |
+
|
155 |
+
> **2단계** - `.env.example` 파일을 복사하고 API 키를 추가한 후, 파일을 `.env`로 저장하세요.
|
156 |
+
|
157 |
+
> **3단계** - docker-compose 파일에서 실행하고 싶지 않은 서비스를 주석 처리하세요.
|
158 |
+
|
159 |
+
```bash
|
160 |
+
$ docker-compose up --build
|
161 |
+
```
|
162 |
+
|
163 |
+
> **4단계** - docker-compose 파일에서 아무 것도 주석 처리하지 않았다면, 기본적으로 두 가지 프로세스가 시작됩니다:
|
164 |
+
- localhost:8000에서 실행 중인 Python 서버<br>
|
165 |
+
- localhost:3000에서 실행 중인 React 앱<br>
|
166 |
+
|
167 |
+
브라우저에서 localhost:3000으로 이동하여 연구를 시작하세요!
|
168 |
+
|
169 |
+
## 📄 로컬 문서로 연구하기
|
170 |
+
|
171 |
+
GPT Researcher를 사용하여 로컬 문서를 기반으로 연구 작업을 수행할 수 있습니다. 현재 지원되는 파일 형식은 PDF, 일반 텍스트, CSV, Excel, Markdown, PowerPoint, Word 문서입니다.
|
172 |
+
|
173 |
+
1단계: `DOC_PATH` 환경 변수를 설정하여 문서가 있는 폴더를 지정하세요.
|
174 |
+
|
175 |
+
```bash
|
176 |
+
export DOC_PATH="./my-docs"
|
177 |
+
```
|
178 |
+
|
179 |
+
2단계:
|
180 |
+
- 프론트엔드 앱을 localhost:8000에서 실행 중이라면, "Report Source" 드롭다운 옵션에서 "My Documents"를 선택하세요.
|
181 |
+
- GPT Researcher를 [PIP 패키지](https://docs.tavily.com/docs/gpt-researcher/pip-package)로 실행 중이라면, `report_source` 인수를 "local"로 설정하여 `GPTResearcher` 클래스를 인스턴스화하세요. [코드 예제](https://docs.gptr.dev/docs/gpt-researcher/context/tailored-research)를 참조하세요.
|
182 |
+
|
183 |
+
## 👪 다중 에이전트 어시스턴트
|
184 |
+
|
185 |
+
AI가 프롬프트 엔지니어링 및 RAG에서 다중 에이전트 시스템으로 발전함에 따라, 우리는 [LangGraph](https://python.langchain.com/v0.1/docs/langgraph/)로 구축된 새로운 다중 에이전트 어시스턴트를 소개합니다.
|
186 |
+
|
187 |
+
LangGraph를 사용하면 여러 에이전트의 전문 기술을 활용하여 연구 과정의 깊이와 질을 크게 향상시킬 수 있습니다. 최근 [STORM](https://arxiv.org/abs/2402.14207) 논문에서 영감을 받아, 이 프로젝트는 AI 에이전트 팀이 주제에 대한 연구를 계획에서 출판까지 함께 수행하는 방법을 보여줍니다.
|
188 |
+
|
189 |
+
평균 실행은 5-6 페이지 분량의 연구 보고서를 PDF, Docx, Markdown 형식으로 생성합니다.
|
190 |
+
|
191 |
+
[여기](https://github.com/assafelovic/gpt-researcher/tree/master/multi_agents)에서 확인하거나 [문서](https://docs.gptr.dev/docs/gpt-researcher/multi_agents/langgraph)에서 자세한 내용을 참조하세요.
|
192 |
+
|
193 |
+
## 🖥️ 프론트엔드 애플리케이션
|
194 |
+
|
195 |
+
GPT-Researcher는 사용자 경험을 개선하고 연구 프로세스를 간소화하기 위해 향상된 프론트엔드를 제공합니다. 프론트엔드는 다음과 같은 기능을 제공합니다:
|
196 |
+
|
197 |
+
- 연구 쿼리를 입력할 수 있는 직관적인 인터페이스
|
198 |
+
- 연구 작업의 실시간 진행 상황 추적
|
199 |
+
- 연구 결과의 대화형 디스플레이
|
200 |
+
- 맞춤형 연구 경험을 위한 설정 가능
|
201 |
+
|
202 |
+
두 가지 배포 옵션이 있습니다:
|
203 |
+
1. FastAPI로 제공되는 경량 정적 프론트엔드
|
204 |
+
2. 고급 기능을 제공하는 NextJS 애플리케이션
|
205 |
+
|
206 |
+
프론트엔드 기능에 대한 자세한 설치 방법 및 정보를 원하시면 [문서 페이지](https://docs.gptr.dev/docs/gpt-researcher/frontend/frontend)를 참조하세요.
|
207 |
+
|
208 |
+
## 🚀 기여하기
|
209 |
+
우리는 기여를 적극 환영합니다! 관심이 있다면 [기여 가이드](https://github.com/assafelovic/gpt-researcher/blob/master/CONTRIBUTING.md)를 확인해 주세요.
|
210 |
+
|
211 |
+
[로드맵](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) 페이지를 확인하고, 우리 [Discord 커뮤니티](https://discord.gg/QgZXvJAccX)에 가입하여 우리의 목표에 함께 참여해 주세요.
|
212 |
+
<a href="https://github.com/assafelovic/gpt-researcher/graphs/contributors">
|
213 |
+
<img src="https://contrib.rocks/image?repo=assafelovic/gpt-researcher" />
|
214 |
+
</a>
|
215 |
+
|
216 |
+
## ✉️ 지원 / 문의
|
217 |
+
- [커뮤니티 Discord](https://discord.gg/spBgZmm3Xe)
|
218 |
+
- 저자 이메일: [email protected]
|
219 |
+
|
220 |
+
## 🛡️ 면책 조항
|
221 |
+
|
222 |
+
이 프로젝트인 GPT Researcher는 실험적인 응용 프로그램이며, 명시적이거나 묵시적인 보증 없이 "있는 그대로" 제공됩니다. 우리는 이 코드를 학술적 목적으로 Apache 2 라이선스 하에 공유하고 있습니다. 여기에 있는 것은 학술적 조언이 아니며, 학술 또는 연구 논문에 사용하는 것을 권장하지 않습니다.
|
223 |
+
|
224 |
+
편향되지 않은 연구 주장에 대한 우리의 견해:
|
225 |
+
1. GPT Researcher의 주요 목표는 잘못된 정보와 편향된 사실을 줄이는 것입니다. 그 방법은 무엇일까요? 우리는 더 많은 사이트를 스크래핑할수록 잘못된 데이터의 가능성이 줄어든다고 가정합니다. 여러 사이트에서 정보를 스크래핑하고 가장 빈번한 정보를 선택하면, 모든 정보가 틀릴 확률은 매우 낮습니다.
|
226 |
+
2. 우리는 편향을 완전히 제거하려고 하지는 않지만, 가능한 한 줄이는 것을 목표로 합니다. **우리는 인간과 LLM의 가장 효과적인 상호작용을 찾기 위한 커뮤니티입니다.**
|
227 |
+
3. 연구에서 사람들도 이미 자신이 연구하는 주제에 대해 의견을 가지고 있기 때문에 편향되는 경향이 있습니다. 이 도구는 많은 의견을 스크래핑하며, 편향된 사람이라면 결코 읽지 않았을 다양한 견해를 고르게 설명합니다.
|
228 |
+
|
229 |
+
**GPT-4 모델을 사용할 경우, 토큰 사용량 때문에 비용이 많이 들 수 있습니다.** 이 프로젝트를 사���하는 경우, 자신의 토큰 사용량 및 관련 비용을 모니터링하고 관리하는 것은 본인의 책임입니다. OpenAI API 사용량을 정기적으로 확인하고, 예상치 못한 비용을 방지하기 위해 필요한 한도를 설정하거나 알림을 설정하는 것이 좋습니다.
|
230 |
+
|
231 |
+
|
232 |
+
---
|
233 |
+
|
234 |
+
<p align="center">
|
235 |
+
<a href="https://star-history.com/#assafelovic/gpt-researcher">
|
236 |
+
<picture>
|
237 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date&theme=dark" />
|
238 |
+
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
|
239 |
+
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
|
240 |
+
</picture>
|
241 |
+
</a>
|
242 |
+
</p>
|
README-zh_CN.md
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center">
|
2 |
+
<!--<h1 style="display: flex; align-items: center; gap: 10px;">
|
3 |
+
<img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/a45bac7c-092c-42e5-8eb6-69acbf20dde5" alt="Logo" width="25">
|
4 |
+
GPT Researcher
|
5 |
+
</h1>-->
|
6 |
+
<img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/20af8286-b386-44a5-9a83-3be1365139c3" alt="Logo" width="80">
|
7 |
+
|
8 |
+
|
9 |
+
####
|
10 |
+
|
11 |
+
[](https://gptr.dev)
|
12 |
+
[](https://docs.gptr.dev)
|
13 |
+
[](https://discord.gg/QgZXvJAccX)
|
14 |
+
|
15 |
+
[](https://badge.fury.io/py/gpt-researcher)
|
16 |
+

|
17 |
+
[](https://colab.research.google.com/github/assafelovic/gpt-researcher/blob/master/docs/docs/examples/pip-run.ipynb)
|
18 |
+
[](https://hub.docker.com/r/gptresearcher/gpt-researcher)
|
19 |
+
[](https://twitter.com/assaf_elovic)
|
20 |
+
|
21 |
+
[English](README.md) |
|
22 |
+
[中文](README-zh_CN.md) |
|
23 |
+
[日本語](README-ja_JP.md) |
|
24 |
+
[한국어](README-ko_KR.md)
|
25 |
+
</div>
|
26 |
+
|
27 |
+
# 🔎 GPT Researcher
|
28 |
+
|
29 |
+
**GPT Researcher 是一个智能体代理,专为各种任务的综合在线研究而设计。**
|
30 |
+
|
31 |
+
代理可以生成详细、正式且客观的研究报告,并提供自定义选项,专注于相关资源、结构框架和经验报告。受最近发表的[Plan-and-Solve](https://arxiv.org/abs/2305.04091) 和[RAG](https://arxiv.org/abs/2005.11401) 论文的启发,GPT Researcher 解决了速度、确定性和可靠性等问题,通过并行化的代理运行,而不是同步操作,提供了更稳定的性能和更高的速度。
|
32 |
+
|
33 |
+
**我们的使命是利用人工智能的力量,为个人和组织提供准确、客观和事实的信息。**
|
34 |
+
|
35 |
+
## 为什么选择GPT Researcher?
|
36 |
+
|
37 |
+
- 因为人工研究任务形成客观结论可能需要时间和经历,有时甚至需要数周才能找到正确的资源和信息。
|
38 |
+
- 目前的LLM是根据历史和过时的信息进行训练的,存在严重的幻觉风险,因此几乎无法胜任研究任务。
|
39 |
+
- 网络搜索的解决方案(例如 ChatGPT + Web 插件)仅考虑有限的资源和内容,在某些情况下会导致肤浅的结论或不客观的答案。
|
40 |
+
- 只使用部分资源可能会在确定研究问题或任务的正确结论时产生偏差。
|
41 |
+
|
42 |
+
## 架构
|
43 |
+
主要思想是运行“**计划者**”和“**执行**”代理,而**计划者**生成问题进行研究,“**执行**”代理根据每个生成的研究问题寻找最相关的信息。最后,“**计划者**”过滤和聚合所有相关信息并创建研究报告。<br /> <br />
|
44 |
+
代理同时利用 gpt-40-mini 和 gpt-4o(128K 上下文)来完成一项研究任务。我们仅在必要时使用这两种方法对成本进行优化。**研究任务平均耗时约 3 分钟,成本约为 ~0.1 美元**。
|
45 |
+
|
46 |
+
<div align="center">
|
47 |
+
<img align="center" height="500" src="https://cowriter-images.s3.amazonaws.com/architecture.png">
|
48 |
+
</div>
|
49 |
+
|
50 |
+
|
51 |
+
详细说明:
|
52 |
+
* 根据研究搜索或任务创建特定领域的代理。
|
53 |
+
* 生成一组研究问题,这些问题共同形成答案对任何给定任务的客观意见。
|
54 |
+
* 针对每个研究问题,触发一个爬虫代理,从在线资源中搜索与给定任务相关的信息。
|
55 |
+
* 对于每一个抓取的资源,根据相关信息进行汇总,并跟踪其来源。
|
56 |
+
* 最后,对所有汇总的资料来源进行过滤和汇总,并生成最终研究报告。
|
57 |
+
|
58 |
+
## 演示
|
59 |
+
https://github.com/assafelovic/gpt-researcher/assets/13554167/a00c89a6-a295-4dd0-b58d-098a31c40fda
|
60 |
+
|
61 |
+
## 教程
|
62 |
+
- [运行原理](https://docs.gptr.dev/blog/building-gpt-researcher)
|
63 |
+
- [如何安装](https://www.loom.com/share/04ebffb6ed2a4520a27c3e3addcdde20?sid=da1848e8-b1f1-42d1-93c3-5b0b9c3b24ea)
|
64 |
+
- [现场演示](https://www.loom.com/share/6a3385db4e8747a1913dd85a7834846f?sid=a740fd5b-2aa3-457e-8fb7-86976f59f9b8)
|
65 |
+
|
66 |
+
## 特性
|
67 |
+
- 📝 生成研究问题、大纲、资源和课题报告
|
68 |
+
- 🌐 每项研究汇总超过20个网络资源,形成客观和真实的结论
|
69 |
+
- 🖥️ 包括易于使用的web界面 (HTML/CSS/JS)
|
70 |
+
- 🔍 支持JavaScript网络资源抓取功能
|
71 |
+
- 📂 追踪访问过和使用过的网络资源和来源
|
72 |
+
- 📄 ���研究报告导出为PDF或其他格式...
|
73 |
+
|
74 |
+
## 📖 文档
|
75 |
+
|
76 |
+
请参阅[此处](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started),了解完整文档:
|
77 |
+
|
78 |
+
- 入门(安装、设置环境、简单示例)
|
79 |
+
- 操作示例(演示、集成、docker 支持)
|
80 |
+
- 参考资料(API完整文档)
|
81 |
+
- Tavily 应用程序接口集成(核心概念的高级解释)
|
82 |
+
|
83 |
+
## 快速开始
|
84 |
+
> **步骤 0** - 安装 Python 3.11 或更高版本。[参见此处](https://www.tutorialsteacher.com/python/install-python) 获取详细指南。
|
85 |
+
|
86 |
+
<br />
|
87 |
+
|
88 |
+
> **步骤 1** - 下载项目
|
89 |
+
|
90 |
+
```bash
|
91 |
+
$ git clone https://github.com/assafelovic/gpt-researcher.git
|
92 |
+
$ cd gpt-researcher
|
93 |
+
```
|
94 |
+
|
95 |
+
<br />
|
96 |
+
|
97 |
+
> **步骤2** -安装依赖项
|
98 |
+
```bash
|
99 |
+
$ pip install -r requirements.txt
|
100 |
+
```
|
101 |
+
<br />
|
102 |
+
|
103 |
+
> **第 3 步** - 使用 OpenAI 密钥和 Tavily API 密钥创建 .env 文件,或直接导出该文件
|
104 |
+
|
105 |
+
```bash
|
106 |
+
$ export OPENAI_API_KEY={Your OpenAI API Key here}
|
107 |
+
```
|
108 |
+
```bash
|
109 |
+
$ export TAVILY_API_KEY={Your Tavily API Key here}
|
110 |
+
```
|
111 |
+
|
112 |
+
- **LLM,我们推荐使用 [OpenAI GPT](https://platform.openai.com/docs/guides/gpt)**,但您也可以使用 [Langchain Adapter](https://python.langchain.com/docs/guides/adapters/openai) 支持的任何其他 LLM 模型(包括开源),只需在 config/config.py 中更改 llm 模型和提供者即可。请按照 [这份指南](https://python.langchain.com/docs/integrations/llms/) 学习如何将 LLM 与 Langchain 集成。
|
113 |
+
- **对于搜索引擎,我们推荐使用 [Tavily Search API](https://app.tavily.com)(已针对 LLM 进行优化)**,但您也可以选择其他搜索引擎,只需将 config/config.py 中的搜索提供程序更改为 "duckduckgo"、"googleAPI"、"searchapi"、"googleSerp "或 "searx "即可。然后在 config.py 文件中添加相应的 env API 密钥。
|
114 |
+
- **我们强烈建议使用 [OpenAI GPT](https://platform.openai.com/docs/guides/gpt) 模型和 [Tavily Search API](https://app.tavily.com) 以获得最佳性能。**
|
115 |
+
<br />
|
116 |
+
|
117 |
+
> **第 4 步** - 使用 FastAPI 运行代理
|
118 |
+
|
119 |
+
```bash
|
120 |
+
$ uvicorn main:app --reload
|
121 |
+
```
|
122 |
+
<br />
|
123 |
+
|
124 |
+
> **第 5 步** - 在任何浏览器上访问 http://localhost:8000,享受研究乐趣!
|
125 |
+
|
126 |
+
要了解如何开始使用 Docker 或了解有关功能和服务的更多信息,请访问 [documentation](https://docs.gptr.dev) 页面。
|
127 |
+
|
128 |
+
## 🚀 贡献
|
129 |
+
我们非常欢迎您的贡献!如果您感兴趣,请查看 [contributing](CONTRIBUTING.md)。
|
130 |
+
|
131 |
+
如果您有兴趣加入我们的任务,请查看我们的 [路线图](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) 页面,并通过我们的 [Discord 社区](https://discord.gg/QgZXvJAccX) 联系我们。
|
132 |
+
|
133 |
+
## ✉️ 支持 / 联系我们
|
134 |
+
- [社区讨论区](https://discord.gg/spBgZmm3Xe)
|
135 |
+
- 我们的邮箱: [email protected]
|
136 |
+
|
137 |
+
## 🛡 免责声明
|
138 |
+
|
139 |
+
本项目 "GPT Researcher "是一个实验性应用程序,按 "现状 "提供,不做任何明示或暗示的保证。我们根据 MIT 许可分享用于学术目的的代码。本文不提供任何学术建议,也不建议在学术或研究论文中使用。
|
140 |
+
|
141 |
+
我们对客观研究主张的看法:
|
142 |
+
1. 我们抓取系统的全部目的是减少不正确的事实。如何解决?我们抓取的网站越多,错误数据的可能性就越小。我们每项研究都会收集20条信息,它们全部错误的可能性极低。
|
143 |
+
2. 我们的目标不是消除偏见,而是尽可能减少偏见。**作为一个社区,我们在这里探索最有效的人机互动**。
|
144 |
+
3. 在研究过程中,人们也容易产生偏见,因为大多数人对自己研究的课题都有自己的看法。这个工具可以搜罗到许多观点,并均匀地解释各种不同的观点,而有偏见的人是绝对读不到这些观点的。
|
145 |
+
|
146 |
+
**请注意,使用 GPT-4 语言模型可能会因使用令牌而产生高昂费用**。使用本项目即表示您承认有责任监控和管理自己的令牌使用情况及相关费用。强烈建议您定期检查 OpenAI API 的使用情况,并设置任何必要的限制或警报,以防止发生意外费用。
|
147 |
+
|
148 |
+
---
|
149 |
+
|
150 |
+
<p align="center">
|
151 |
+
<a href="https://star-history.com/#assafelovic/gpt-researcher">
|
152 |
+
<picture>
|
153 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date&theme=dark" />
|
154 |
+
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
|
155 |
+
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
|
156 |
+
</picture>
|
157 |
+
</a>
|
158 |
+
</p>
|
README.md
CHANGED
@@ -1,11 +1,231 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<div align="center" id="top">
|
2 |
+
|
3 |
+
<img src="https://github.com/assafelovic/gpt-researcher/assets/13554167/20af8286-b386-44a5-9a83-3be1365139c3" alt="Logo" width="80">
|
4 |
+
|
5 |
+
####
|
6 |
+
|
7 |
+
[](https://gptr.dev)
|
8 |
+
[](https://docs.gptr.dev)
|
9 |
+
[](https://discord.gg/QgZXvJAccX)
|
10 |
+
|
11 |
+
[](https://badge.fury.io/py/gpt-researcher)
|
12 |
+

|
13 |
+
[](https://colab.research.google.com/github/assafelovic/gpt-researcher/blob/master/docs/docs/examples/pip-run.ipynb)
|
14 |
+
[](https://hub.docker.com/r/gptresearcher/gpt-researcher)
|
15 |
+
[](https://twitter.com/assaf_elovic)
|
16 |
+
|
17 |
+
[English](README.md) | [中文](README-zh_CN.md) | [日本語](README-ja_JP.md) | [한국어](README-ko_KR.md)
|
18 |
+
|
19 |
+
</div>
|
20 |
+
|
21 |
+
# 🔎 GPT Researcher
|
22 |
+
|
23 |
+
**GPT Researcher is an autonomous agent designed for comprehensive web and local research on any given task.**
|
24 |
+
|
25 |
+
The agent produces detailed, factual, and unbiased research reports with citations. GPT Researcher provides a full suite of customization options to create tailor made and domain specific research agents. Inspired by the recent [Plan-and-Solve](https://arxiv.org/abs/2305.04091) and [RAG](https://arxiv.org/abs/2005.11401) papers, GPT Researcher addresses misinformation, speed, determinism, and reliability by offering stable performance and increased speed through parallelized agent work.
|
26 |
+
|
27 |
+
**Our mission is to empower individuals and organizations with accurate, unbiased, and factual information through AI.**
|
28 |
+
|
29 |
+
## Why GPT Researcher?
|
30 |
+
|
31 |
+
- Objective conclusions for manual research can take weeks, requiring vast resources and time.
|
32 |
+
- LLMs trained on outdated information can hallucinate, becoming irrelevant for current research tasks.
|
33 |
+
- Current LLMs have token limitations, insufficient for generating long research reports.
|
34 |
+
- Limited web sources in existing services lead to misinformation and shallow results.
|
35 |
+
- Selective web sources can introduce bias into research tasks.
|
36 |
+
|
37 |
+
## Demo
|
38 |
+
https://github.com/user-attachments/assets/2cc38f6a-9f66-4644-9e69-a46c40e296d4
|
39 |
+
|
40 |
+
## Architecture
|
41 |
+
|
42 |
+
The core idea is to utilize 'planner' and 'execution' agents. The planner generates research questions, while the execution agents gather relevant information. The publisher then aggregates all findings into a comprehensive report.
|
43 |
+
|
44 |
+
<div align="center">
|
45 |
+
<img align="center" height="600" src="https://github.com/assafelovic/gpt-researcher/assets/13554167/4ac896fd-63ab-4b77-9688-ff62aafcc527">
|
46 |
+
</div>
|
47 |
+
|
48 |
+
Steps:
|
49 |
+
* Create a task-specific agent based on a research query.
|
50 |
+
* Generate questions that collectively form an objective opinion on the task.
|
51 |
+
* Use a crawler agent for gathering information for each question.
|
52 |
+
* Summarize and source-track each resource.
|
53 |
+
* Filter and aggregate summaries into a final research report.
|
54 |
+
|
55 |
+
## Tutorials
|
56 |
+
- [How it Works](https://docs.gptr.dev/blog/building-gpt-researcher)
|
57 |
+
- [How to Install](https://www.loom.com/share/04ebffb6ed2a4520a27c3e3addcdde20?sid=da1848e8-b1f1-42d1-93c3-5b0b9c3b24ea)
|
58 |
+
- [Live Demo](https://www.loom.com/share/6a3385db4e8747a1913dd85a7834846f?sid=a740fd5b-2aa3-457e-8fb7-86976f59f9b8)
|
59 |
+
|
60 |
+
## Features
|
61 |
+
|
62 |
+
- 📝 Generate detailed research reports using web and local documents.
|
63 |
+
- 🖼️ Smart image scraping and filtering for reports.
|
64 |
+
- 📜 Generate detailed reports exceeding 2,000 words.
|
65 |
+
- 🌐 Aggregate over 20 sources for objective conclusions.
|
66 |
+
- 🖥️ Frontend available in lightweight (HTML/CSS/JS) and production-ready (NextJS + Tailwind) versions.
|
67 |
+
- 🔍 JavaScript-enabled web scraping.
|
68 |
+
- 📂 Maintains memory and context throughout research.
|
69 |
+
- 📄 Export reports to PDF, Word, and other formats.
|
70 |
+
|
71 |
+
## 📖 Documentation
|
72 |
+
|
73 |
+
See the [Documentation](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started) for:
|
74 |
+
- Installation and setup guides
|
75 |
+
- Configuration and customization options
|
76 |
+
- How-To examples
|
77 |
+
- Full API references
|
78 |
+
|
79 |
+
## ⚙️ Getting Started
|
80 |
+
|
81 |
+
### Installation
|
82 |
+
|
83 |
+
1. Install Python 3.11 or later. [Guide](https://www.tutorialsteacher.com/python/install-python).
|
84 |
+
2. Clone the project and navigate to the directory:
|
85 |
+
|
86 |
+
```bash
|
87 |
+
git clone https://github.com/assafelovic/gpt-researcher.git
|
88 |
+
cd gpt-researcher
|
89 |
+
```
|
90 |
+
|
91 |
+
3. Set up API keys by exporting them or storing them in a `.env` file.
|
92 |
+
|
93 |
+
```bash
|
94 |
+
export OPENAI_API_KEY={Your OpenAI API Key here}
|
95 |
+
export TAVILY_API_KEY={Your Tavily API Key here}
|
96 |
+
```
|
97 |
+
|
98 |
+
4. Install dependencies and start the server:
|
99 |
+
|
100 |
+
```bash
|
101 |
+
pip install -r requirements.txt
|
102 |
+
python -m uvicorn main:app --reload
|
103 |
+
```
|
104 |
+
|
105 |
+
Visit [http://localhost:8000](http://localhost:8000) to start.
|
106 |
+
|
107 |
+
For other setups (e.g., Poetry or virtual environments), check the [Getting Started page](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started).
|
108 |
+
|
109 |
+
## Run as PIP package
|
110 |
+
```bash
|
111 |
+
pip install gpt-researcher
|
112 |
+
|
113 |
+
```
|
114 |
+
### Example Usage:
|
115 |
+
```python
|
116 |
+
...
|
117 |
+
from gpt_researcher import GPTResearcher
|
118 |
+
|
119 |
+
query = "why is Nvidia stock going up?"
|
120 |
+
researcher = GPTResearcher(query=query, report_type="research_report")
|
121 |
+
# Conduct research on the given query
|
122 |
+
research_result = await researcher.conduct_research()
|
123 |
+
# Write the report
|
124 |
+
report = await researcher.write_report()
|
125 |
+
...
|
126 |
+
```
|
127 |
+
|
128 |
+
**For more examples and configurations, please refer to the [PIP documentation](https://docs.gptr.dev/docs/gpt-researcher/gptr/pip-package) page.**
|
129 |
+
|
130 |
+
|
131 |
+
## Run with Docker
|
132 |
+
|
133 |
+
> **Step 1** - [Install Docker](https://docs.gptr.dev/docs/gpt-researcher/getting-started/getting-started-with-docker)
|
134 |
+
|
135 |
+
> **Step 2** - Clone the '.env.example' file, add your API Keys to the cloned file and save the file as '.env'
|
136 |
+
|
137 |
+
> **Step 3** - Within the docker-compose file comment out services that you don't want to run with Docker.
|
138 |
+
|
139 |
+
```bash
|
140 |
+
docker-compose up --build
|
141 |
+
```
|
142 |
+
|
143 |
+
If that doesn't work, try running it without the dash:
|
144 |
+
```bash
|
145 |
+
docker compose up --build
|
146 |
+
```
|
147 |
+
|
148 |
+
|
149 |
+
> **Step 4** - By default, if you haven't uncommented anything in your docker-compose file, this flow will start 2 processes:
|
150 |
+
- the Python server running on localhost:8000<br>
|
151 |
+
- the React app running on localhost:3000<br>
|
152 |
+
|
153 |
+
Visit localhost:3000 on any browser and enjoy researching!
|
154 |
+
|
155 |
+
|
156 |
+
|
157 |
+
## 📄 Research on Local Documents
|
158 |
+
|
159 |
+
You can instruct the GPT Researcher to run research tasks based on your local documents. Currently supported file formats are: PDF, plain text, CSV, Excel, Markdown, PowerPoint, and Word documents.
|
160 |
+
|
161 |
+
Step 1: Add the env variable `DOC_PATH` pointing to the folder where your documents are located.
|
162 |
+
|
163 |
+
```bash
|
164 |
+
export DOC_PATH="./my-docs"
|
165 |
+
```
|
166 |
+
|
167 |
+
Step 2:
|
168 |
+
- If you're running the frontend app on localhost:8000, simply select "My Documents" from the "Report Source" Dropdown Options.
|
169 |
+
- If you're running GPT Researcher with the [PIP package](https://docs.tavily.com/docs/gpt-researcher/pip-package), pass the `report_source` argument as "local" when you instantiate the `GPTResearcher` class [code sample here](https://docs.gptr.dev/docs/gpt-researcher/context/tailored-research).
|
170 |
+
|
171 |
+
|
172 |
+
## 👪 Multi-Agent Assistant
|
173 |
+
As AI evolves from prompt engineering and RAG to multi-agent systems, we're excited to introduce our new multi-agent assistant built with [LangGraph](https://python.langchain.com/v0.1/docs/langgraph/).
|
174 |
+
|
175 |
+
By using LangGraph, the research process can be significantly improved in depth and quality by leveraging multiple agents with specialized skills. Inspired by the recent [STORM](https://arxiv.org/abs/2402.14207) paper, this project showcases how a team of AI agents can work together to conduct research on a given topic, from planning to publication.
|
176 |
+
|
177 |
+
An average run generates a 5-6 page research report in multiple formats such as PDF, Docx and Markdown.
|
178 |
+
|
179 |
+
Check it out [here](https://github.com/assafelovic/gpt-researcher/tree/master/multi_agents) or head over to our [documentation](https://docs.gptr.dev/docs/gpt-researcher/multi_agents/langgraph) for more information.
|
180 |
+
|
181 |
+
## 🖥️ Frontend Applications
|
182 |
+
|
183 |
+
GPT-Researcher now features an enhanced frontend to improve the user experience and streamline the research process. The frontend offers:
|
184 |
+
|
185 |
+
- An intuitive interface for inputting research queries
|
186 |
+
- Real-time progress tracking of research tasks
|
187 |
+
- Interactive display of research findings
|
188 |
+
- Customizable settings for tailored research experiences
|
189 |
+
|
190 |
+
Two deployment options are available:
|
191 |
+
1. A lightweight static frontend served by FastAPI
|
192 |
+
2. A feature-rich NextJS application for advanced functionality
|
193 |
+
|
194 |
+
For detailed setup instructions and more information about the frontend features, please visit our [documentation page](https://docs.gptr.dev/docs/gpt-researcher/frontend/frontend).
|
195 |
+
|
196 |
+
## 🚀 Contributing
|
197 |
+
We highly welcome contributions! Please check out [contributing](https://github.com/assafelovic/gpt-researcher/blob/master/CONTRIBUTING.md) if you're interested.
|
198 |
+
|
199 |
+
Please check out our [roadmap](https://trello.com/b/3O7KBePw/gpt-researcher-roadmap) page and reach out to us via our [Discord community](https://discord.gg/QgZXvJAccX) if you're interested in joining our mission.
|
200 |
+
<a href="https://github.com/assafelovic/gpt-researcher/graphs/contributors">
|
201 |
+
<img src="https://contrib.rocks/image?repo=assafelovic/gpt-researcher" />
|
202 |
+
</a>
|
203 |
+
## ✉️ Support / Contact us
|
204 |
+
- [Community Discord](https://discord.gg/spBgZmm3Xe)
|
205 |
+
- Author Email: [email protected]
|
206 |
+
|
207 |
+
## 🛡 Disclaimer
|
208 |
+
|
209 |
+
This project, GPT Researcher, is an experimental application and is provided "as-is" without any warranty, express or implied. We are sharing codes for academic purposes under the Apache 2 license. Nothing herein is academic advice, and NOT a recommendation to use in academic or research papers.
|
210 |
+
|
211 |
+
Our view on unbiased research claims:
|
212 |
+
1. The main goal of GPT Researcher is to reduce incorrect and biased facts. How? We assume that the more sites we scrape the less chances of incorrect data. By scraping multiple sites per research, and choosing the most frequent information, the chances that they are all wrong is extremely low.
|
213 |
+
2. We do not aim to eliminate biases; we aim to reduce it as much as possible. **We are here as a community to figure out the most effective human/llm interactions.**
|
214 |
+
3. In research, people also tend towards biases as most have already opinions on the topics they research about. This tool scrapes many opinions and will evenly explain diverse views that a biased person would never have read.
|
215 |
+
|
216 |
+
---
|
217 |
+
|
218 |
+
<p align="center">
|
219 |
+
<a href="https://star-history.com/#assafelovic/gpt-researcher">
|
220 |
+
<picture>
|
221 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date&theme=dark" />
|
222 |
+
<source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
|
223 |
+
<img alt="Star History Chart" src="https://api.star-history.com/svg?repos=assafelovic/gpt-researcher&type=Date" />
|
224 |
+
</picture>
|
225 |
+
</a>
|
226 |
+
</p>
|
227 |
+
|
228 |
+
|
229 |
+
<p align="right">
|
230 |
+
<a href="#top">⬆️ Back to Top</a>
|
231 |
+
</p>
|
__pycache__/main.cpython-312.pyc
ADDED
Binary file (1.49 kB). View file
|
|
backend/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from multi_agents import agents
|
backend/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (202 Bytes). View file
|
|
backend/__pycache__/utils.cpython-312.pyc
ADDED
Binary file (4.01 kB). View file
|
|
backend/chat/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .chat import ChatAgentWithMemory
|
backend/chat/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (212 Bytes). View file
|
|
backend/chat/__pycache__/chat.cpython-312.pyc
ADDED
Binary file (5.49 kB). View file
|
|
backend/chat/chat.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import WebSocket
|
2 |
+
import uuid
|
3 |
+
|
4 |
+
from gpt_researcher.utils.llm import get_llm
|
5 |
+
from gpt_researcher.memory import Memory
|
6 |
+
from gpt_researcher.config.config import Config
|
7 |
+
|
8 |
+
from langgraph.prebuilt import create_react_agent
|
9 |
+
from langgraph.checkpoint.memory import MemorySaver
|
10 |
+
|
11 |
+
from langchain_community.vectorstores import InMemoryVectorStore
|
12 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
13 |
+
from langchain.tools import Tool, tool
|
14 |
+
|
15 |
+
class ChatAgentWithMemory:
|
16 |
+
def __init__(
|
17 |
+
self,
|
18 |
+
report: str,
|
19 |
+
config_path,
|
20 |
+
headers,
|
21 |
+
vector_store = None
|
22 |
+
):
|
23 |
+
self.report = report
|
24 |
+
self.headers = headers
|
25 |
+
self.config = Config(config_path)
|
26 |
+
self.vector_store = vector_store
|
27 |
+
self.graph = self.create_agent()
|
28 |
+
|
29 |
+
def create_agent(self):
|
30 |
+
"""Create React Agent Graph"""
|
31 |
+
cfg = Config()
|
32 |
+
|
33 |
+
# Retrieve LLM using get_llm with settings from config
|
34 |
+
provider = get_llm(
|
35 |
+
llm_provider=cfg.smart_llm_provider,
|
36 |
+
model=cfg.smart_llm_model,
|
37 |
+
temperature=0.35,
|
38 |
+
max_tokens=cfg.smart_token_limit,
|
39 |
+
**self.config.llm_kwargs
|
40 |
+
).llm
|
41 |
+
|
42 |
+
# If vector_store is not initialized, process documents and add to vector_store
|
43 |
+
if not self.vector_store:
|
44 |
+
documents = self._process_document(self.report)
|
45 |
+
self.chat_config = {"configurable": {"thread_id": str(uuid.uuid4())}}
|
46 |
+
self.embedding = Memory(
|
47 |
+
cfg.embedding_provider,
|
48 |
+
cfg.embedding_model,
|
49 |
+
**cfg.embedding_kwargs
|
50 |
+
).get_embeddings()
|
51 |
+
self.vector_store = InMemoryVectorStore(self.embedding)
|
52 |
+
self.vector_store.add_texts(documents)
|
53 |
+
|
54 |
+
# Create the React Agent Graph with the configured provider
|
55 |
+
graph = create_react_agent(
|
56 |
+
provider,
|
57 |
+
tools=[self.vector_store_tool(self.vector_store)],
|
58 |
+
checkpointer=MemorySaver()
|
59 |
+
)
|
60 |
+
|
61 |
+
return graph
|
62 |
+
|
63 |
+
def vector_store_tool(self, vector_store) -> Tool:
|
64 |
+
"""Create Vector Store Tool"""
|
65 |
+
@tool
|
66 |
+
def retrieve_info(query):
|
67 |
+
"""
|
68 |
+
Consult the report for relevant contexts whenever you don't know something
|
69 |
+
"""
|
70 |
+
retriever = vector_store.as_retriever(k = 4)
|
71 |
+
return retriever.invoke(query)
|
72 |
+
return retrieve_info
|
73 |
+
|
74 |
+
def _process_document(self, report):
|
75 |
+
"""Split Report into Chunks"""
|
76 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
77 |
+
chunk_size=1024,
|
78 |
+
chunk_overlap=20,
|
79 |
+
length_function=len,
|
80 |
+
is_separator_regex=False,
|
81 |
+
)
|
82 |
+
documents = text_splitter.split_text(report)
|
83 |
+
return documents
|
84 |
+
|
85 |
+
async def chat(self, message, websocket):
|
86 |
+
"""Chat with React Agent"""
|
87 |
+
message = f"""
|
88 |
+
You are GPT Researcher, a autonomous research agent created by an open source community at https://github.com/assafelovic/gpt-researcher, homepage: https://gptr.dev.
|
89 |
+
To learn more about GPT Researcher you can suggest to check out: https://docs.gptr.dev.
|
90 |
+
|
91 |
+
This is a chat message between the user and you: GPT Researcher.
|
92 |
+
The chat is about a research reports that you created. Answer based on the given context and report.
|
93 |
+
You must include citations to your answer based on the report.
|
94 |
+
|
95 |
+
Report: {self.report}
|
96 |
+
User Message: {message}
|
97 |
+
"""
|
98 |
+
inputs = {"messages": [("user", message)]}
|
99 |
+
response = await self.graph.ainvoke(inputs, config=self.chat_config)
|
100 |
+
ai_message = response["messages"][-1].content
|
101 |
+
if websocket is not None:
|
102 |
+
await websocket.send_json({"type": "chat", "content": ai_message})
|
103 |
+
|
104 |
+
def get_context(self):
|
105 |
+
"""return the current context of the chat"""
|
106 |
+
return self.report
|
backend/memory/__init__.py
ADDED
File without changes
|
backend/memory/draft.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import TypedDict, List, Annotated
|
2 |
+
import operator
|
3 |
+
|
4 |
+
|
5 |
+
class DraftState(TypedDict):
|
6 |
+
task: dict
|
7 |
+
topic: str
|
8 |
+
draft: dict
|
9 |
+
review: str
|
10 |
+
revision_notes: str
|
backend/memory/research.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import TypedDict, List, Annotated
|
2 |
+
import operator
|
3 |
+
|
4 |
+
|
5 |
+
class ResearchState(TypedDict):
|
6 |
+
task: dict
|
7 |
+
initial_research: str
|
8 |
+
sections: List[str]
|
9 |
+
research_data: List[dict]
|
10 |
+
# Report layout
|
11 |
+
title: str
|
12 |
+
headers: dict
|
13 |
+
date: str
|
14 |
+
table_of_contents: str
|
15 |
+
introduction: str
|
16 |
+
conclusion: str
|
17 |
+
sources: List[str]
|
18 |
+
report: str
|
19 |
+
|
20 |
+
|
backend/report_type/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .basic_report.basic_report import BasicReport
|
2 |
+
from .detailed_report.detailed_report import DetailedReport
|
3 |
+
|
4 |
+
__all__ = [
|
5 |
+
"BasicReport",
|
6 |
+
"DetailedReport"
|
7 |
+
]
|
backend/report_type/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (345 Bytes). View file
|
|
backend/report_type/basic_report/__init__.py
ADDED
File without changes
|
backend/report_type/basic_report/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (181 Bytes). View file
|
|
backend/report_type/basic_report/__pycache__/basic_report.cpython-312.pyc
ADDED
Binary file (1.84 kB). View file
|
|
backend/report_type/basic_report/basic_report.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import WebSocket
|
2 |
+
from typing import Any
|
3 |
+
|
4 |
+
from gpt_researcher import GPTResearcher
|
5 |
+
|
6 |
+
|
7 |
+
class BasicReport:
|
8 |
+
def __init__(
|
9 |
+
self,
|
10 |
+
query: str,
|
11 |
+
report_type: str,
|
12 |
+
report_source: str,
|
13 |
+
source_urls,
|
14 |
+
document_urls,
|
15 |
+
tone: Any,
|
16 |
+
config_path: str,
|
17 |
+
websocket: WebSocket,
|
18 |
+
headers=None
|
19 |
+
):
|
20 |
+
self.query = query
|
21 |
+
self.report_type = report_type
|
22 |
+
self.report_source = report_source
|
23 |
+
self.source_urls = source_urls
|
24 |
+
self.document_urls = document_urls
|
25 |
+
self.tone = tone
|
26 |
+
self.config_path = config_path
|
27 |
+
self.websocket = websocket
|
28 |
+
self.headers = headers or {}
|
29 |
+
|
30 |
+
async def run(self):
|
31 |
+
# Initialize researcher
|
32 |
+
researcher = GPTResearcher(
|
33 |
+
query=self.query,
|
34 |
+
report_type=self.report_type,
|
35 |
+
report_source=self.report_source,
|
36 |
+
source_urls=self.source_urls,
|
37 |
+
document_urls=self.document_urls,
|
38 |
+
tone=self.tone,
|
39 |
+
config_path=self.config_path,
|
40 |
+
websocket=self.websocket,
|
41 |
+
headers=self.headers
|
42 |
+
)
|
43 |
+
|
44 |
+
await researcher.conduct_research()
|
45 |
+
report = await researcher.write_report()
|
46 |
+
return report
|
backend/report_type/detailed_report/README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Detailed Reports
|
2 |
+
|
3 |
+
Introducing long and detailed reports, with a completely new architecture inspired by the latest [STORM](https://arxiv.org/abs/2402.14207) paper.
|
4 |
+
|
5 |
+
In this method we do the following:
|
6 |
+
|
7 |
+
1. Trigger Initial GPT Researcher report based on task
|
8 |
+
2. Generate subtopics from research summary
|
9 |
+
3. For each subtopic the headers of the subtopic report are extracted and accumulated
|
10 |
+
4. For each subtopic a report is generated making sure that any information about the headers accumulated until now are not re-generated.
|
11 |
+
5. An additional introduction section is written along with a table of contents constructed from the entire report.
|
12 |
+
6. The final report is constructed by appending these : Intro + Table of contents + Subsection reports
|
backend/report_type/detailed_report/__init__.py
ADDED
File without changes
|
backend/report_type/detailed_report/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (184 Bytes). View file
|
|
backend/report_type/detailed_report/__pycache__/detailed_report.cpython-312.pyc
ADDED
Binary file (8.03 kB). View file
|
|
backend/report_type/detailed_report/detailed_report.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
from typing import List, Dict, Set, Optional, Any
|
3 |
+
from fastapi import WebSocket
|
4 |
+
|
5 |
+
from gpt_researcher import GPTResearcher
|
6 |
+
|
7 |
+
|
8 |
+
class DetailedReport:
|
9 |
+
def __init__(
|
10 |
+
self,
|
11 |
+
query: str,
|
12 |
+
report_type: str,
|
13 |
+
report_source: str,
|
14 |
+
source_urls: List[str] = [],
|
15 |
+
document_urls: List[str] = [],
|
16 |
+
config_path: str = None,
|
17 |
+
tone: Any = "",
|
18 |
+
websocket: WebSocket = None,
|
19 |
+
subtopics: List[Dict] = [],
|
20 |
+
headers: Optional[Dict] = None
|
21 |
+
):
|
22 |
+
self.query = query
|
23 |
+
self.report_type = report_type
|
24 |
+
self.report_source = report_source
|
25 |
+
self.source_urls = source_urls
|
26 |
+
self.document_urls = document_urls
|
27 |
+
self.config_path = config_path
|
28 |
+
self.tone = tone
|
29 |
+
self.websocket = websocket
|
30 |
+
self.subtopics = subtopics
|
31 |
+
self.headers = headers or {}
|
32 |
+
|
33 |
+
self.gpt_researcher = GPTResearcher(
|
34 |
+
query=self.query,
|
35 |
+
report_type="research_report",
|
36 |
+
report_source=self.report_source,
|
37 |
+
source_urls=self.source_urls,
|
38 |
+
document_urls=self.document_urls,
|
39 |
+
config_path=self.config_path,
|
40 |
+
tone=self.tone,
|
41 |
+
websocket=self.websocket,
|
42 |
+
headers=self.headers
|
43 |
+
)
|
44 |
+
self.existing_headers: List[Dict] = []
|
45 |
+
self.global_context: List[str] = []
|
46 |
+
self.global_written_sections: List[str] = []
|
47 |
+
self.global_urls: Set[str] = set(
|
48 |
+
self.source_urls) if self.source_urls else set()
|
49 |
+
|
50 |
+
async def run(self) -> str:
|
51 |
+
await self._initial_research()
|
52 |
+
subtopics = await self._get_all_subtopics()
|
53 |
+
report_introduction = await self.gpt_researcher.write_introduction()
|
54 |
+
_, report_body = await self._generate_subtopic_reports(subtopics)
|
55 |
+
self.gpt_researcher.visited_urls.update(self.global_urls)
|
56 |
+
report = await self._construct_detailed_report(report_introduction, report_body)
|
57 |
+
return report
|
58 |
+
|
59 |
+
async def _initial_research(self) -> None:
|
60 |
+
await self.gpt_researcher.conduct_research()
|
61 |
+
self.global_context = self.gpt_researcher.context
|
62 |
+
self.global_urls = self.gpt_researcher.visited_urls
|
63 |
+
|
64 |
+
async def _get_all_subtopics(self) -> List[Dict]:
|
65 |
+
subtopics_data = await self.gpt_researcher.get_subtopics()
|
66 |
+
|
67 |
+
all_subtopics = []
|
68 |
+
if subtopics_data and subtopics_data.subtopics:
|
69 |
+
for subtopic in subtopics_data.subtopics:
|
70 |
+
all_subtopics.append({"task": subtopic.task})
|
71 |
+
else:
|
72 |
+
print(f"Unexpected subtopics data format: {subtopics_data}")
|
73 |
+
|
74 |
+
return all_subtopics
|
75 |
+
|
76 |
+
async def _generate_subtopic_reports(self, subtopics: List[Dict]) -> tuple:
|
77 |
+
subtopic_reports = []
|
78 |
+
subtopics_report_body = ""
|
79 |
+
|
80 |
+
for subtopic in subtopics:
|
81 |
+
result = await self._get_subtopic_report(subtopic)
|
82 |
+
if result["report"]:
|
83 |
+
subtopic_reports.append(result)
|
84 |
+
subtopics_report_body += f"\n\n\n{result['report']}"
|
85 |
+
|
86 |
+
return subtopic_reports, subtopics_report_body
|
87 |
+
|
88 |
+
async def _get_subtopic_report(self, subtopic: Dict) -> Dict[str, str]:
|
89 |
+
current_subtopic_task = subtopic.get("task")
|
90 |
+
subtopic_assistant = GPTResearcher(
|
91 |
+
query=current_subtopic_task,
|
92 |
+
report_type="subtopic_report",
|
93 |
+
report_source=self.report_source,
|
94 |
+
websocket=self.websocket,
|
95 |
+
headers=self.headers,
|
96 |
+
parent_query=self.query,
|
97 |
+
subtopics=self.subtopics,
|
98 |
+
visited_urls=self.global_urls,
|
99 |
+
agent=self.gpt_researcher.agent,
|
100 |
+
role=self.gpt_researcher.role,
|
101 |
+
tone=self.tone,
|
102 |
+
)
|
103 |
+
|
104 |
+
subtopic_assistant.context = list(set(self.global_context))
|
105 |
+
await subtopic_assistant.conduct_research()
|
106 |
+
|
107 |
+
draft_section_titles = await subtopic_assistant.get_draft_section_titles(current_subtopic_task)
|
108 |
+
|
109 |
+
if not isinstance(draft_section_titles, str):
|
110 |
+
draft_section_titles = str(draft_section_titles)
|
111 |
+
|
112 |
+
parse_draft_section_titles = self.gpt_researcher.extract_headers(draft_section_titles)
|
113 |
+
parse_draft_section_titles_text = [header.get(
|
114 |
+
"text", "") for header in parse_draft_section_titles]
|
115 |
+
|
116 |
+
relevant_contents = await subtopic_assistant.get_similar_written_contents_by_draft_section_titles(
|
117 |
+
current_subtopic_task, parse_draft_section_titles_text, self.global_written_sections
|
118 |
+
)
|
119 |
+
|
120 |
+
subtopic_report = await subtopic_assistant.write_report(self.existing_headers, relevant_contents)
|
121 |
+
|
122 |
+
self.global_written_sections.extend(self.gpt_researcher.extract_sections(subtopic_report))
|
123 |
+
self.global_context = list(set(subtopic_assistant.context))
|
124 |
+
self.global_urls.update(subtopic_assistant.visited_urls)
|
125 |
+
|
126 |
+
self.existing_headers.append({
|
127 |
+
"subtopic task": current_subtopic_task,
|
128 |
+
"headers": self.gpt_researcher.extract_headers(subtopic_report),
|
129 |
+
})
|
130 |
+
|
131 |
+
return {"topic": subtopic, "report": subtopic_report}
|
132 |
+
|
133 |
+
async def _construct_detailed_report(self, introduction: str, report_body: str) -> str:
|
134 |
+
toc = self.gpt_researcher.table_of_contents(report_body)
|
135 |
+
conclusion = await self.gpt_researcher.write_report_conclusion(report_body)
|
136 |
+
conclusion_with_references = self.gpt_researcher.add_references(
|
137 |
+
conclusion, self.gpt_researcher.visited_urls)
|
138 |
+
report = f"{introduction}\n\n{toc}\n\n{report_body}\n\n{conclusion_with_references}"
|
139 |
+
return report
|
backend/server/__init__.py
ADDED
File without changes
|
backend/server/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (163 Bytes). View file
|
|
backend/server/__pycache__/server.cpython-312.pyc
ADDED
Binary file (5.85 kB). View file
|
|
backend/server/__pycache__/server_utils.cpython-312.pyc
ADDED
Binary file (14.8 kB). View file
|
|
backend/server/__pycache__/websocket_manager.cpython-312.pyc
ADDED
Binary file (5.55 kB). View file
|
|
backend/server/app.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
import logging
|
4 |
+
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
+
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
# Add CORS middleware
|
10 |
+
app.add_middleware(
|
11 |
+
CORSMiddleware,
|
12 |
+
allow_origins=["*"], # In production, replace with your frontend domain
|
13 |
+
allow_credentials=True,
|
14 |
+
allow_methods=["*"],
|
15 |
+
allow_headers=["*"],
|
16 |
+
)
|
backend/server/logging_config.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
from datetime import datetime
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
class JSONResearchHandler:
|
8 |
+
def __init__(self, json_file):
|
9 |
+
self.json_file = json_file
|
10 |
+
self.research_data = {
|
11 |
+
"timestamp": datetime.now().isoformat(),
|
12 |
+
"events": [],
|
13 |
+
"content": {
|
14 |
+
"query": "",
|
15 |
+
"sources": [],
|
16 |
+
"context": [],
|
17 |
+
"report": "",
|
18 |
+
"costs": 0.0
|
19 |
+
}
|
20 |
+
}
|
21 |
+
|
22 |
+
def log_event(self, event_type: str, data: dict):
|
23 |
+
self.research_data["events"].append({
|
24 |
+
"timestamp": datetime.now().isoformat(),
|
25 |
+
"type": event_type,
|
26 |
+
"data": data
|
27 |
+
})
|
28 |
+
self._save_json()
|
29 |
+
|
30 |
+
def update_content(self, key: str, value):
|
31 |
+
self.research_data["content"][key] = value
|
32 |
+
self._save_json()
|
33 |
+
|
34 |
+
def _save_json(self):
|
35 |
+
with open(self.json_file, 'w') as f:
|
36 |
+
json.dump(self.research_data, f, indent=2)
|
37 |
+
|
38 |
+
def setup_research_logging():
|
39 |
+
# Create logs directory if it doesn't exist
|
40 |
+
logs_dir = Path("logs")
|
41 |
+
logs_dir.mkdir(exist_ok=True)
|
42 |
+
|
43 |
+
# Generate timestamp for log files
|
44 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
45 |
+
|
46 |
+
# Create log file paths
|
47 |
+
log_file = logs_dir / f"research_{timestamp}.log"
|
48 |
+
json_file = logs_dir / f"research_{timestamp}.json"
|
49 |
+
|
50 |
+
# Configure file handler for research logs
|
51 |
+
file_handler = logging.FileHandler(log_file)
|
52 |
+
file_handler.setLevel(logging.INFO)
|
53 |
+
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
54 |
+
|
55 |
+
# Get research logger and configure it
|
56 |
+
research_logger = logging.getLogger('research')
|
57 |
+
research_logger.setLevel(logging.INFO)
|
58 |
+
|
59 |
+
# Remove any existing handlers to avoid duplicates
|
60 |
+
research_logger.handlers.clear()
|
61 |
+
|
62 |
+
# Add file handler
|
63 |
+
research_logger.addHandler(file_handler)
|
64 |
+
|
65 |
+
# Add stream handler for console output
|
66 |
+
console_handler = logging.StreamHandler()
|
67 |
+
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
68 |
+
research_logger.addHandler(console_handler)
|
69 |
+
|
70 |
+
# Prevent propagation to root logger to avoid duplicate logs
|
71 |
+
research_logger.propagate = False
|
72 |
+
|
73 |
+
# Create JSON handler
|
74 |
+
json_handler = JSONResearchHandler(json_file)
|
75 |
+
|
76 |
+
return str(log_file), str(json_file), research_logger, json_handler
|
77 |
+
|
78 |
+
# Create a function to get the logger and JSON handler
|
79 |
+
def get_research_logger():
|
80 |
+
return logging.getLogger('research')
|
81 |
+
|
82 |
+
def get_json_handler():
|
83 |
+
return getattr(logging.getLogger('research'), 'json_handler', None)
|
backend/server/server.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from typing import Dict, List
|
4 |
+
|
5 |
+
from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect, File, UploadFile, Header
|
6 |
+
from fastapi.middleware.cors import CORSMiddleware
|
7 |
+
from fastapi.staticfiles import StaticFiles
|
8 |
+
from fastapi.templating import Jinja2Templates
|
9 |
+
from pydantic import BaseModel
|
10 |
+
|
11 |
+
from backend.server.websocket_manager import WebSocketManager
|
12 |
+
from backend.server.server_utils import (
|
13 |
+
get_config_dict,
|
14 |
+
update_environment_variables, handle_file_upload, handle_file_deletion,
|
15 |
+
execute_multi_agents, handle_websocket_communication
|
16 |
+
)
|
17 |
+
|
18 |
+
|
19 |
+
from gpt_researcher.utils.logging_config import setup_research_logging
|
20 |
+
|
21 |
+
import logging
|
22 |
+
|
23 |
+
# Get logger instance
|
24 |
+
logger = logging.getLogger(__name__)
|
25 |
+
|
26 |
+
# Don't override parent logger settings
|
27 |
+
logger.propagate = True
|
28 |
+
|
29 |
+
logging.basicConfig(
|
30 |
+
level=logging.INFO,
|
31 |
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
32 |
+
handlers=[
|
33 |
+
logging.StreamHandler() # Only log to console
|
34 |
+
]
|
35 |
+
)
|
36 |
+
|
37 |
+
# Models
|
38 |
+
|
39 |
+
|
40 |
+
class ResearchRequest(BaseModel):
|
41 |
+
task: str
|
42 |
+
report_type: str
|
43 |
+
agent: str
|
44 |
+
|
45 |
+
|
46 |
+
class ConfigRequest(BaseModel):
|
47 |
+
ANTHROPIC_API_KEY: str
|
48 |
+
TAVILY_API_KEY: str
|
49 |
+
LANGCHAIN_TRACING_V2: str
|
50 |
+
LANGCHAIN_API_KEY: str
|
51 |
+
OPENAI_API_KEY: str
|
52 |
+
DOC_PATH: str
|
53 |
+
RETRIEVER: str
|
54 |
+
GOOGLE_API_KEY: str = ''
|
55 |
+
GOOGLE_CX_KEY: str = ''
|
56 |
+
BING_API_KEY: str = ''
|
57 |
+
SEARCHAPI_API_KEY: str = ''
|
58 |
+
SERPAPI_API_KEY: str = ''
|
59 |
+
SERPER_API_KEY: str = ''
|
60 |
+
SEARX_URL: str = ''
|
61 |
+
XAI_API_KEY: str
|
62 |
+
DEEPSEEK_API_KEY: str
|
63 |
+
|
64 |
+
|
65 |
+
# App initialization
|
66 |
+
app = FastAPI()
|
67 |
+
|
68 |
+
# Static files and templates
|
69 |
+
app.mount("/site", StaticFiles(directory="./frontend"), name="site")
|
70 |
+
app.mount("/static", StaticFiles(directory="./frontend/static"), name="static")
|
71 |
+
templates = Jinja2Templates(directory="./frontend")
|
72 |
+
|
73 |
+
# WebSocket manager
|
74 |
+
manager = WebSocketManager()
|
75 |
+
|
76 |
+
# Middleware
|
77 |
+
app.add_middleware(
|
78 |
+
CORSMiddleware,
|
79 |
+
allow_origins=["http://localhost:3000"],
|
80 |
+
allow_credentials=True,
|
81 |
+
allow_methods=["*"],
|
82 |
+
allow_headers=["*"],
|
83 |
+
)
|
84 |
+
|
85 |
+
# Constants
|
86 |
+
DOC_PATH = os.getenv("DOC_PATH", "./my-docs")
|
87 |
+
|
88 |
+
# Startup event
|
89 |
+
|
90 |
+
|
91 |
+
@app.on_event("startup")
|
92 |
+
def startup_event():
|
93 |
+
os.makedirs("outputs", exist_ok=True)
|
94 |
+
app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs")
|
95 |
+
os.makedirs(DOC_PATH, exist_ok=True)
|
96 |
+
|
97 |
+
|
98 |
+
# Routes
|
99 |
+
|
100 |
+
|
101 |
+
@app.get("/")
|
102 |
+
async def read_root(request: Request):
|
103 |
+
return templates.TemplateResponse("index.html", {"request": request, "report": None})
|
104 |
+
|
105 |
+
|
106 |
+
@app.get("/files/")
|
107 |
+
async def list_files():
|
108 |
+
files = os.listdir(DOC_PATH)
|
109 |
+
print(f"Files in {DOC_PATH}: {files}")
|
110 |
+
return {"files": files}
|
111 |
+
|
112 |
+
|
113 |
+
@app.post("/api/multi_agents")
|
114 |
+
async def run_multi_agents():
|
115 |
+
return await execute_multi_agents(manager)
|
116 |
+
|
117 |
+
|
118 |
+
@app.post("/upload/")
|
119 |
+
async def upload_file(file: UploadFile = File(...)):
|
120 |
+
return await handle_file_upload(file, DOC_PATH)
|
121 |
+
|
122 |
+
|
123 |
+
@app.delete("/files/{filename}")
|
124 |
+
async def delete_file(filename: str):
|
125 |
+
return await handle_file_deletion(filename, DOC_PATH)
|
126 |
+
|
127 |
+
|
128 |
+
@app.websocket("/ws")
|
129 |
+
async def websocket_endpoint(websocket: WebSocket):
|
130 |
+
await manager.connect(websocket)
|
131 |
+
try:
|
132 |
+
await handle_websocket_communication(websocket, manager)
|
133 |
+
except WebSocketDisconnect:
|
134 |
+
await manager.disconnect(websocket)
|
backend/server/server_utils.py
ADDED
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
import time
|
5 |
+
import shutil
|
6 |
+
from typing import Dict, List, Any
|
7 |
+
from fastapi.responses import JSONResponse, FileResponse
|
8 |
+
from gpt_researcher.document.document import DocumentLoader
|
9 |
+
from backend.utils import write_md_to_pdf, write_md_to_word, write_text_to_md
|
10 |
+
from pathlib import Path
|
11 |
+
from datetime import datetime
|
12 |
+
from fastapi import HTTPException
|
13 |
+
import logging
|
14 |
+
|
15 |
+
logging.basicConfig(level=logging.DEBUG)
|
16 |
+
logger = logging.getLogger(__name__)
|
17 |
+
|
18 |
+
class CustomLogsHandler:
|
19 |
+
"""Custom handler to capture streaming logs from the research process"""
|
20 |
+
def __init__(self, websocket, task: str):
|
21 |
+
self.logs = []
|
22 |
+
self.websocket = websocket
|
23 |
+
sanitized_filename = sanitize_filename(f"task_{int(time.time())}_{task}")
|
24 |
+
self.log_file = os.path.join("outputs", f"{sanitized_filename}.json")
|
25 |
+
self.timestamp = datetime.now().isoformat()
|
26 |
+
# Initialize log file with metadata
|
27 |
+
os.makedirs("outputs", exist_ok=True)
|
28 |
+
with open(self.log_file, 'w') as f:
|
29 |
+
json.dump({
|
30 |
+
"timestamp": self.timestamp,
|
31 |
+
"events": [],
|
32 |
+
"content": {
|
33 |
+
"query": "",
|
34 |
+
"sources": [],
|
35 |
+
"context": [],
|
36 |
+
"report": "",
|
37 |
+
"costs": 0.0
|
38 |
+
}
|
39 |
+
}, f, indent=2)
|
40 |
+
|
41 |
+
async def send_json(self, data: Dict[str, Any]) -> None:
|
42 |
+
"""Store log data and send to websocket"""
|
43 |
+
# Send to websocket for real-time display
|
44 |
+
if self.websocket:
|
45 |
+
await self.websocket.send_json(data)
|
46 |
+
|
47 |
+
# Read current log file
|
48 |
+
with open(self.log_file, 'r') as f:
|
49 |
+
log_data = json.load(f)
|
50 |
+
|
51 |
+
# Update appropriate section based on data type
|
52 |
+
if data.get('type') == 'logs':
|
53 |
+
log_data['events'].append({
|
54 |
+
"timestamp": datetime.now().isoformat(),
|
55 |
+
"type": "event",
|
56 |
+
"data": data
|
57 |
+
})
|
58 |
+
else:
|
59 |
+
# Update content section for other types of data
|
60 |
+
log_data['content'].update(data)
|
61 |
+
|
62 |
+
# Save updated log file
|
63 |
+
with open(self.log_file, 'w') as f:
|
64 |
+
json.dump(log_data, f, indent=2)
|
65 |
+
logger.debug(f"Log entry written to: {self.log_file}")
|
66 |
+
|
67 |
+
|
68 |
+
class Researcher:
|
69 |
+
def __init__(self, query: str, report_type: str = "research_report"):
|
70 |
+
self.query = query
|
71 |
+
self.report_type = report_type
|
72 |
+
# Generate unique ID for this research task
|
73 |
+
self.research_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{hash(query)}"
|
74 |
+
# Initialize logs handler with research ID
|
75 |
+
self.logs_handler = CustomLogsHandler(self.research_id)
|
76 |
+
self.researcher = GPTResearcher(
|
77 |
+
query=query,
|
78 |
+
report_type=report_type,
|
79 |
+
websocket=self.logs_handler
|
80 |
+
)
|
81 |
+
|
82 |
+
async def research(self) -> dict:
|
83 |
+
"""Conduct research and return paths to generated files"""
|
84 |
+
await self.researcher.conduct_research()
|
85 |
+
report = await self.researcher.write_report()
|
86 |
+
|
87 |
+
# Generate the files
|
88 |
+
sanitized_filename = sanitize_filename(f"task_{int(time.time())}_{self.query}")
|
89 |
+
file_paths = await generate_report_files(report, sanitized_filename)
|
90 |
+
|
91 |
+
# Get the JSON log path that was created by CustomLogsHandler
|
92 |
+
json_relative_path = os.path.relpath(self.logs_handler.log_file)
|
93 |
+
|
94 |
+
return {
|
95 |
+
"output": {
|
96 |
+
**file_paths, # Include PDF, DOCX, and MD paths
|
97 |
+
"json": json_relative_path
|
98 |
+
}
|
99 |
+
}
|
100 |
+
|
101 |
+
def sanitize_filename(filename: str) -> str:
|
102 |
+
# Split into components
|
103 |
+
prefix, timestamp, *task_parts = filename.split('_')
|
104 |
+
task = '_'.join(task_parts)
|
105 |
+
|
106 |
+
# Calculate max length for task portion
|
107 |
+
# 255 - len("outputs/") - len("task_") - len(timestamp) - len("_.json") - safety_margin
|
108 |
+
max_task_length = 255 - 8 - 5 - 10 - 6 - 10 # ~216 chars for task
|
109 |
+
|
110 |
+
# Truncate task if needed
|
111 |
+
truncated_task = task[:max_task_length] if len(task) > max_task_length else task
|
112 |
+
|
113 |
+
# Reassemble and clean the filename
|
114 |
+
sanitized = f"{prefix}_{timestamp}_{truncated_task}"
|
115 |
+
return re.sub(r"[^\w\s-]", "", sanitized).strip()
|
116 |
+
|
117 |
+
|
118 |
+
async def handle_start_command(websocket, data: str, manager):
|
119 |
+
json_data = json.loads(data[6:])
|
120 |
+
task, report_type, source_urls, document_urls, tone, headers, report_source = extract_command_data(
|
121 |
+
json_data)
|
122 |
+
|
123 |
+
if not task or not report_type:
|
124 |
+
print("Error: Missing task or report_type")
|
125 |
+
return
|
126 |
+
|
127 |
+
# Create logs handler with websocket and task
|
128 |
+
logs_handler = CustomLogsHandler(websocket, task)
|
129 |
+
# Initialize log content with query
|
130 |
+
await logs_handler.send_json({
|
131 |
+
"query": task,
|
132 |
+
"sources": [],
|
133 |
+
"context": [],
|
134 |
+
"report": ""
|
135 |
+
})
|
136 |
+
|
137 |
+
sanitized_filename = sanitize_filename(f"task_{int(time.time())}_{task}")
|
138 |
+
|
139 |
+
report = await manager.start_streaming(
|
140 |
+
task,
|
141 |
+
report_type,
|
142 |
+
report_source,
|
143 |
+
source_urls,
|
144 |
+
document_urls,
|
145 |
+
tone,
|
146 |
+
websocket,
|
147 |
+
headers
|
148 |
+
)
|
149 |
+
report = str(report)
|
150 |
+
file_paths = await generate_report_files(report, sanitized_filename)
|
151 |
+
# Add JSON log path to file_paths
|
152 |
+
file_paths["json"] = os.path.relpath(logs_handler.log_file)
|
153 |
+
await send_file_paths(websocket, file_paths)
|
154 |
+
|
155 |
+
|
156 |
+
async def handle_human_feedback(data: str):
|
157 |
+
feedback_data = json.loads(data[14:]) # Remove "human_feedback" prefix
|
158 |
+
print(f"Received human feedback: {feedback_data}")
|
159 |
+
# TODO: Add logic to forward the feedback to the appropriate agent or update the research state
|
160 |
+
|
161 |
+
async def handle_chat(websocket, data: str, manager):
|
162 |
+
json_data = json.loads(data[4:])
|
163 |
+
print(f"Received chat message: {json_data.get('message')}")
|
164 |
+
await manager.chat(json_data.get("message"), websocket)
|
165 |
+
|
166 |
+
async def generate_report_files(report: str, filename: str) -> Dict[str, str]:
|
167 |
+
pdf_path = await write_md_to_pdf(report, filename)
|
168 |
+
docx_path = await write_md_to_word(report, filename)
|
169 |
+
md_path = await write_text_to_md(report, filename)
|
170 |
+
return {"pdf": pdf_path, "docx": docx_path, "md": md_path}
|
171 |
+
|
172 |
+
|
173 |
+
async def send_file_paths(websocket, file_paths: Dict[str, str]):
|
174 |
+
await websocket.send_json({"type": "path", "output": file_paths})
|
175 |
+
|
176 |
+
|
177 |
+
def get_config_dict(
|
178 |
+
langchain_api_key: str, openai_api_key: str, tavily_api_key: str,
|
179 |
+
google_api_key: str, google_cx_key: str, bing_api_key: str,
|
180 |
+
searchapi_api_key: str, serpapi_api_key: str, serper_api_key: str, searx_url: str
|
181 |
+
) -> Dict[str, str]:
|
182 |
+
return {
|
183 |
+
"LANGCHAIN_API_KEY": langchain_api_key or os.getenv("LANGCHAIN_API_KEY", ""),
|
184 |
+
"OPENAI_API_KEY": openai_api_key or os.getenv("OPENAI_API_KEY", ""),
|
185 |
+
"TAVILY_API_KEY": tavily_api_key or os.getenv("TAVILY_API_KEY", ""),
|
186 |
+
"GOOGLE_API_KEY": google_api_key or os.getenv("GOOGLE_API_KEY", ""),
|
187 |
+
"GOOGLE_CX_KEY": google_cx_key or os.getenv("GOOGLE_CX_KEY", ""),
|
188 |
+
"BING_API_KEY": bing_api_key or os.getenv("BING_API_KEY", ""),
|
189 |
+
"SEARCHAPI_API_KEY": searchapi_api_key or os.getenv("SEARCHAPI_API_KEY", ""),
|
190 |
+
"SERPAPI_API_KEY": serpapi_api_key or os.getenv("SERPAPI_API_KEY", ""),
|
191 |
+
"SERPER_API_KEY": serper_api_key or os.getenv("SERPER_API_KEY", ""),
|
192 |
+
"SEARX_URL": searx_url or os.getenv("SEARX_URL", ""),
|
193 |
+
"LANGCHAIN_TRACING_V2": os.getenv("LANGCHAIN_TRACING_V2", "true"),
|
194 |
+
"DOC_PATH": os.getenv("DOC_PATH", "./my-docs"),
|
195 |
+
"RETRIEVER": os.getenv("RETRIEVER", ""),
|
196 |
+
"EMBEDDING_MODEL": os.getenv("OPENAI_EMBEDDING_MODEL", "")
|
197 |
+
}
|
198 |
+
|
199 |
+
|
200 |
+
def update_environment_variables(config: Dict[str, str]):
|
201 |
+
for key, value in config.items():
|
202 |
+
os.environ[key] = value
|
203 |
+
|
204 |
+
|
205 |
+
async def handle_file_upload(file, DOC_PATH: str) -> Dict[str, str]:
|
206 |
+
file_path = os.path.join(DOC_PATH, os.path.basename(file.filename))
|
207 |
+
with open(file_path, "wb") as buffer:
|
208 |
+
shutil.copyfileobj(file.file, buffer)
|
209 |
+
print(f"File uploaded to {file_path}")
|
210 |
+
|
211 |
+
document_loader = DocumentLoader(DOC_PATH)
|
212 |
+
await document_loader.load()
|
213 |
+
|
214 |
+
return {"filename": file.filename, "path": file_path}
|
215 |
+
|
216 |
+
|
217 |
+
async def handle_file_deletion(filename: str, DOC_PATH: str) -> JSONResponse:
|
218 |
+
file_path = os.path.join(DOC_PATH, os.path.basename(filename))
|
219 |
+
if os.path.exists(file_path):
|
220 |
+
os.remove(file_path)
|
221 |
+
print(f"File deleted: {file_path}")
|
222 |
+
return JSONResponse(content={"message": "File deleted successfully"})
|
223 |
+
else:
|
224 |
+
print(f"File not found: {file_path}")
|
225 |
+
return JSONResponse(status_code=404, content={"message": "File not found"})
|
226 |
+
|
227 |
+
|
228 |
+
async def execute_multi_agents(manager) -> Any:
|
229 |
+
websocket = manager.active_connections[0] if manager.active_connections else None
|
230 |
+
if websocket:
|
231 |
+
report = await run_research_task("Is AI in a hype cycle?", websocket, stream_output)
|
232 |
+
return {"report": report}
|
233 |
+
else:
|
234 |
+
return JSONResponse(status_code=400, content={"message": "No active WebSocket connection"})
|
235 |
+
|
236 |
+
|
237 |
+
async def handle_websocket_communication(websocket, manager):
|
238 |
+
while True:
|
239 |
+
data = await websocket.receive_text()
|
240 |
+
if data.startswith("start"):
|
241 |
+
await handle_start_command(websocket, data, manager)
|
242 |
+
elif data.startswith("human_feedback"):
|
243 |
+
await handle_human_feedback(data)
|
244 |
+
elif data.startswith("chat"):
|
245 |
+
await handle_chat(websocket, data, manager)
|
246 |
+
else:
|
247 |
+
print("Error: Unknown command or not enough parameters provided.")
|
248 |
+
|
249 |
+
|
250 |
+
def extract_command_data(json_data: Dict) -> tuple:
|
251 |
+
return (
|
252 |
+
json_data.get("task"),
|
253 |
+
json_data.get("report_type"),
|
254 |
+
json_data.get("source_urls"),
|
255 |
+
json_data.get("document_urls"),
|
256 |
+
json_data.get("tone"),
|
257 |
+
json_data.get("headers", {}),
|
258 |
+
json_data.get("report_source")
|
259 |
+
)
|