Zulelee commited on
Commit
57b8424
·
1 Parent(s): 11607d7

Upload 62 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +1 -0
  2. .github/dependabot.yml +15 -0
  3. .github/workflows/docker-bulid.yml +24 -0
  4. .gitignore +10 -0
  5. CODE_OF_CONDUCT.md +128 -0
  6. CONTRIBUTING.md +42 -0
  7. Dockerfile +29 -0
  8. LICENSE +21 -0
  9. __pycache__/main.cpython-311.pyc +0 -0
  10. actions/__pycache__/web_scrape.cpython-311.pyc +0 -0
  11. actions/__pycache__/web_search.cpython-311.pyc +0 -0
  12. actions/web_scrape.py +220 -0
  13. actions/web_search.py +25 -0
  14. agent/__pycache__/llm_utils.cpython-311.pyc +0 -0
  15. agent/__pycache__/prompts.cpython-311.pyc +0 -0
  16. agent/__pycache__/research_agent.cpython-311.pyc +0 -0
  17. agent/__pycache__/run.cpython-311.pyc +0 -0
  18. agent/llm_utils.py +124 -0
  19. agent/prompts.py +137 -0
  20. agent/research_agent.py +188 -0
  21. agent/run.py +60 -0
  22. client/index.html +135 -0
  23. client/scripts.js +150 -0
  24. client/static/academicResearchAgentAvatar.png +0 -0
  25. client/static/businessAnalystAgentAvatar.png +0 -0
  26. client/static/computerSecurityanalystAvatar.png +0 -0
  27. client/static/defaultAgentAvatar.JPG +0 -0
  28. client/static/favicon.ico +0 -0
  29. client/static/financeAgentAvatar.png +0 -0
  30. client/static/mathAgentAvatar.png +0 -0
  31. client/static/travelAgentAvatar.png +0 -0
  32. client/styles.css +131 -0
  33. config/__init__.py +9 -0
  34. config/__pycache__/__init__.cpython-311.pyc +0 -0
  35. config/__pycache__/config.cpython-311.pyc +0 -0
  36. config/__pycache__/singleton.cpython-311.pyc +0 -0
  37. config/config.py +83 -0
  38. config/singleton.py +24 -0
  39. docker-compose.yml +9 -0
  40. js/overlay.js +29 -0
  41. main.py +71 -0
  42. outputs/540826e12734403b01e368d80fefc9cae0571027/research-Papers on understanding learning mechanisms through chain of thought prompting.txt +4 -0
  43. outputs/540826e12734403b01e368d80fefc9cae0571027/research-Scientific articles on how thought promptings affect decision making - October 11, 2023.txt +4 -0
  44. outputs/540826e12734403b01e368d80fefc9cae0571027/research-The cognitive process behind chain of thought prompting.txt +4 -0
  45. outputs/540826e12734403b01e368d80fefc9cae0571027/research_report.md +38 -0
  46. outputs/540826e12734403b01e368d80fefc9cae0571027/research_report.pdf +0 -0
  47. permchain_example/README.md +33 -0
  48. permchain_example/editor_actors/editor.py +51 -0
  49. permchain_example/research_team.py +75 -0
  50. permchain_example/researcher.py +36 -0
.dockerignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .git
.github/dependabot.yml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for all configuration options:
4
+ # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+
6
+ version: 2
7
+ updates:
8
+ - package-ecosystem: "pip" # See documentation for possible values
9
+ directory: "/" # Location of package manifests
10
+ schedule:
11
+ interval: "weekly"
12
+ - package-ecosystem: "docker"
13
+ directory: "/"
14
+ schedule:
15
+ interval: "weekly"
.github/workflows/docker-bulid.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: GitHub Actions Workflow
2
+ run-name: ${{ github.actor }} has started docker build workflow.
3
+ on:
4
+ pull_request:
5
+ types: [opened, edited, ready_for_review]
6
+ jobs:
7
+ docker:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - name: Git checkout
11
+ uses: actions/checkout@master
12
+ - name: Set up QEMU
13
+ uses: docker/setup-qemu-action@v2
14
+ - name: Set up Docker Buildx
15
+ uses: docker/setup-buildx-action@v2
16
+ with:
17
+ driver: docker
18
+ - name: Build Dockerfile
19
+ uses: docker/build-push-action@v4
20
+ with:
21
+ push: false
22
+ tags: assafelovic/gpt-researcher:latest
23
+ file: Dockerfile
24
+
.gitignore ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #Ignore env containing secrets
2
+ .env
3
+ .envrc
4
+ #Ignore Virtual Env
5
+ env/
6
+ .venv/
7
+ #Ignore generated outputs
8
+ outputs/
9
+ #Ignore pycache
10
+ **/__pycache__/
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, religion, or sexual identity
10
+ and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ * Demonstrating empathy and kindness toward other people
21
+ * Being respectful of differing opinions, viewpoints, and experiences
22
+ * Giving and gracefully accepting constructive feedback
23
+ * Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience
25
+ * Focusing on what is best not just for us as individuals, but for the
26
+ overall community
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ * The use of sexualized language or imagery, and sexual attention or
31
+ advances of any kind
32
+ * Trolling, insulting or derogatory comments, and personal or political attacks
33
+ * Public or private harassment
34
+ * Publishing others' private information, such as a physical or email
35
+ address, without their explicit permission
36
+ * Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting
38
+
39
+ ## Enforcement Responsibilities
40
+
41
+ Community leaders are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Community leaders have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official e-mail address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the community leaders responsible for enforcement at
63
64
+ All complaints will be reviewed and investigated promptly and fairly.
65
+
66
+ All community leaders are obligated to respect the privacy and security of the
67
+ reporter of any incident.
68
+
69
+ ## Enforcement Guidelines
70
+
71
+ Community leaders will follow these Community Impact Guidelines in determining
72
+ the consequences for any action they deem in violation of this Code of Conduct:
73
+
74
+ ### 1. Correction
75
+
76
+ **Community Impact**: Use of inappropriate language or other behavior deemed
77
+ unprofessional or unwelcome in the community.
78
+
79
+ **Consequence**: A private, written warning from community leaders, providing
80
+ clarity around the nature of the violation and an explanation of why the
81
+ behavior was inappropriate. A public apology may be requested.
82
+
83
+ ### 2. Warning
84
+
85
+ **Community Impact**: A violation through a single incident or series
86
+ of actions.
87
+
88
+ **Consequence**: A warning with consequences for continued behavior. No
89
+ interaction with the people involved, including unsolicited interaction with
90
+ those enforcing the Code of Conduct, for a specified period of time. This
91
+ includes avoiding interactions in community spaces as well as external channels
92
+ like social media. Violating these terms may lead to a temporary or
93
+ permanent ban.
94
+
95
+ ### 3. Temporary Ban
96
+
97
+ **Community Impact**: A serious violation of community standards, including
98
+ sustained inappropriate behavior.
99
+
100
+ **Consequence**: A temporary ban from any sort of interaction or public
101
+ communication with the community for a specified period of time. No public or
102
+ private interaction with the people involved, including unsolicited interaction
103
+ with those enforcing the Code of Conduct, is allowed during this period.
104
+ Violating these terms may lead to a permanent ban.
105
+
106
+ ### 4. Permanent Ban
107
+
108
+ **Community Impact**: Demonstrating a pattern of violation of community
109
+ standards, including sustained inappropriate behavior, harassment of an
110
+ individual, or aggression toward or disparagement of classes of individuals.
111
+
112
+ **Consequence**: A permanent ban from any sort of public interaction within
113
+ the community.
114
+
115
+ ## Attribution
116
+
117
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118
+ version 2.0, available at
119
+ https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120
+
121
+ Community Impact Guidelines were inspired by [Mozilla's code of conduct
122
+ enforcement ladder](https://github.com/mozilla/diversity).
123
+
124
+ [homepage]: https://www.contributor-covenant.org
125
+
126
+ For answers to common questions about this code of conduct, see the FAQ at
127
+ https://www.contributor-covenant.org/faq. Translations are available at
128
+ https://www.contributor-covenant.org/translations.
CONTRIBUTING.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to GPT Researcher
2
+ First off, we'd like to welcome and thank you for your interest and effort in contributing to our open source project ❤️. Contributions of all forms are welcome, from new features and bug fixes, to documentation and more.
3
+
4
+ We are on a mission to build the #1 AI agent for comprehensive, unbiased, and factual research online. And we need your support to achieve this grand vision.
5
+
6
+ Please take a moment to review this document in order to make the contribution process easy and effective for everyone involved.
7
+
8
+ ## Reporting Issues
9
+
10
+ If you come across any issue or have an idea for an improvement, don't hesitate to create an issue on GitHub. Describe your problem in sufficient detail, providing as much relevant information as possible. This way, we can reproduce the issue before attempting to fix it or respond appropriately.
11
+
12
+ ## Contributing Code
13
+
14
+ 1. **Fork the repository and create your branch from `master`.**
15
+ If it's not an urgent bug fix, you should branch from `master` and work on the feature or fix in there.
16
+
17
+ 2. **Conduct your changes.**
18
+ Make your changes following best practices for coding in the project's language.
19
+
20
+ 3. **Test your changes.**
21
+ Make sure your changes pass all the tests if there are any. If the project doesn't have automated testing infrastructure, test your changes manually to confirm they behave as expected.
22
+
23
+ 4. **Follow the coding style.**
24
+ Ensure your code adheres to the coding conventions used throughout the project, that includes indentation, accurate comments, etc.
25
+
26
+ 5. **Commit your changes.**
27
+ Make your git commits informative and concise. This is very helpful for others when they look at the git log.
28
+
29
+ 6. **Push to your fork and submit a pull request.**
30
+ When your work is ready and passes tests, push your branch to your fork of the repository and submit a pull request from there.
31
+
32
+ 7. **Pat your back and wait for the review.**
33
+ Your work is done, congratulations! Now sit tight. The project maintainers will review your submission as soon as possible. They might suggest changes or ask for improvements. Both constructive conversation and patience are key to the collaboration process.
34
+
35
+
36
+ ## Documentation
37
+
38
+ If you would like to contribute to the project's documentation, please follow the same steps: fork the repository, make your changes, test them, and submit a pull request.
39
+
40
+ Documentation is a vital part of any software. It's not just about having good code. Ensuring that the users and contributors understand what's going on, how to use the software or how to contribute, is crucial.
41
+
42
+ We're grateful for all our contributors, and we look forward to building the world's leading AI research agent hand-in-hand with you. Let's harness the power of Open Source and AI to change the world together!
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11.4-slim-bullseye as install-browser
2
+
3
+ RUN apt-get update \
4
+ && apt-get satisfy -y \
5
+ "chromium, chromium-driver (>= 115.0)" \
6
+ && chromium --version && chromedriver --version
7
+
8
+ FROM install-browser as gpt-researcher-install
9
+
10
+ ENV PIP_ROOT_USER_ACTION=ignore
11
+
12
+ RUN mkdir /usr/src/app
13
+ WORKDIR /usr/src/app
14
+
15
+ COPY ./requirements.txt ./requirements.txt
16
+ RUN pip install -r requirements.txt
17
+
18
+ FROM gpt-researcher-install AS gpt-researcher
19
+
20
+ RUN useradd -ms /bin/bash gpt-researcher \
21
+ && chown -R gpt-researcher:gpt-researcher /usr/src/app
22
+
23
+ USER gpt-researcher
24
+
25
+ COPY --chown=gpt-researcher:gpt-researcher ./ ./
26
+
27
+ EXPOSE 8000
28
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
29
+
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Assaf Elovic
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
__pycache__/main.cpython-311.pyc ADDED
Binary file (4.35 kB). View file
 
actions/__pycache__/web_scrape.cpython-311.pyc ADDED
Binary file (11.1 kB). View file
 
actions/__pycache__/web_search.cpython-311.pyc ADDED
Binary file (1.31 kB). View file
 
actions/web_scrape.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Selenium web scraping module."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import asyncio
6
+ from pathlib import Path
7
+ from sys import platform
8
+
9
+ from bs4 import BeautifulSoup
10
+ from webdriver_manager.chrome import ChromeDriverManager
11
+ from webdriver_manager.firefox import GeckoDriverManager
12
+ from selenium import webdriver
13
+ from selenium.webdriver.chrome.service import Service
14
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
15
+ from selenium.webdriver.common.by import By
16
+ from selenium.webdriver.firefox.options import Options as FirefoxOptions
17
+ from selenium.webdriver.remote.webdriver import WebDriver
18
+ from selenium.webdriver.safari.options import Options as SafariOptions
19
+ from selenium.webdriver.support import expected_conditions as EC
20
+ from selenium.webdriver.support.wait import WebDriverWait
21
+ from fastapi import WebSocket
22
+
23
+ import processing.text as summary
24
+
25
+ from config import Config
26
+ from processing.html import extract_hyperlinks, format_hyperlinks
27
+
28
+ from concurrent.futures import ThreadPoolExecutor
29
+
30
+ executor = ThreadPoolExecutor()
31
+
32
+ FILE_DIR = Path(__file__).parent.parent
33
+ CFG = Config()
34
+
35
+
36
+ async def async_browse(url: str, question: str, websocket: WebSocket) -> str:
37
+ """Browse a website and return the answer and links to the user
38
+
39
+ Args:
40
+ url (str): The url of the website to browse
41
+ question (str): The question asked by the user
42
+ websocket (WebSocketManager): The websocket manager
43
+
44
+ Returns:
45
+ str: The answer and links to the user
46
+ """
47
+ loop = asyncio.get_event_loop()
48
+ executor = ThreadPoolExecutor(max_workers=8)
49
+
50
+ print(f"Scraping url {url} with question {question}")
51
+ await websocket.send_json(
52
+ {"type": "logs", "output": f"🔎 Browsing the {url} for relevant about: {question}..."})
53
+
54
+ try:
55
+ driver, text = await loop.run_in_executor(executor, scrape_text_with_selenium, url)
56
+ await loop.run_in_executor(executor, add_header, driver)
57
+ summary_text = await loop.run_in_executor(executor, summary.summarize_text, url, text, question, driver)
58
+
59
+ await websocket.send_json(
60
+ {"type": "logs", "output": f"📝 Information gathered from url {url}: {summary_text}"})
61
+
62
+ return f"Information gathered from url {url}: {summary_text}"
63
+ except Exception as e:
64
+ print(f"An error occurred while processing the url {url}: {e}")
65
+ return f"Error processing the url {url}: {e}"
66
+
67
+
68
+
69
+ def browse_website(url: str, question: str) -> tuple[str, WebDriver]:
70
+ """Browse a website and return the answer and links to the user
71
+
72
+ Args:
73
+ url (str): The url of the website to browse
74
+ question (str): The question asked by the user
75
+
76
+ Returns:
77
+ Tuple[str, WebDriver]: The answer and links to the user and the webdriver
78
+ """
79
+
80
+ if not url:
81
+ return "A URL was not specified, cancelling request to browse website.", None
82
+
83
+ driver, text = scrape_text_with_selenium(url)
84
+ add_header(driver)
85
+ summary_text = summary.summarize_text(url, text, question, driver)
86
+
87
+ links = scrape_links_with_selenium(driver, url)
88
+
89
+ # Limit links to 5
90
+ if len(links) > 5:
91
+ links = links[:5]
92
+
93
+ # write_to_file('research-{0}.txt'.format(url), summary_text + "\nSource Links: {0}\n\n".format(links))
94
+
95
+ close_browser(driver)
96
+ return f"Answer gathered from website: {summary_text} \n \n Links: {links}", driver
97
+
98
+
99
+ def scrape_text_with_selenium(url: str) -> tuple[WebDriver, str]:
100
+ """Scrape text from a website using selenium
101
+
102
+ Args:
103
+ url (str): The url of the website to scrape
104
+
105
+ Returns:
106
+ Tuple[WebDriver, str]: The webdriver and the text scraped from the website
107
+ """
108
+ logging.getLogger("selenium").setLevel(logging.CRITICAL)
109
+
110
+ options_available = {
111
+ "chrome": ChromeOptions,
112
+ "safari": SafariOptions,
113
+ "firefox": FirefoxOptions,
114
+ }
115
+
116
+ options = options_available[CFG.selenium_web_browser]()
117
+ options.add_argument(f"user-agent={CFG.user_agent}")
118
+ options.add_argument('--headless')
119
+ options.add_argument("--enable-javascript")
120
+
121
+ if CFG.selenium_web_browser == "firefox":
122
+ service = Service(executable_path=GeckoDriverManager().install())
123
+ driver = webdriver.Firefox(
124
+ service=service, options=options
125
+ )
126
+ elif CFG.selenium_web_browser == "safari":
127
+ # Requires a bit more setup on the users end
128
+ # See https://developer.apple.com/documentation/webkit/testing_with_webdriver_in_safari
129
+ driver = webdriver.Safari(options=options)
130
+ else:
131
+ if platform == "linux" or platform == "linux2":
132
+ options.add_argument("--disable-dev-shm-usage")
133
+ options.add_argument("--remote-debugging-port=9222")
134
+ options.add_argument("--no-sandbox")
135
+ options.add_experimental_option(
136
+ "prefs", {"download_restrictions": 3}
137
+ )
138
+ driver = webdriver.Chrome(options=options)
139
+
140
+ print(f"scraping url {url}...")
141
+ driver.get(url)
142
+
143
+ WebDriverWait(driver, 10).until(
144
+ EC.presence_of_element_located((By.TAG_NAME, "body"))
145
+ )
146
+
147
+ # Get the HTML content directly from the browser's DOM
148
+ page_source = driver.execute_script("return document.body.outerHTML;")
149
+ soup = BeautifulSoup(page_source, "html.parser")
150
+
151
+ for script in soup(["script", "style"]):
152
+ script.extract()
153
+
154
+ # text = soup.get_text()
155
+ text = get_text(soup)
156
+
157
+ lines = (line.strip() for line in text.splitlines())
158
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
159
+ text = "\n".join(chunk for chunk in chunks if chunk)
160
+ return driver, text
161
+
162
+
163
+ def get_text(soup):
164
+ """Get the text from the soup
165
+
166
+ Args:
167
+ soup (BeautifulSoup): The soup to get the text from
168
+
169
+ Returns:
170
+ str: The text from the soup
171
+ """
172
+ text = ""
173
+ tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'p']
174
+ for element in soup.find_all(tags): # Find all the <p> elements
175
+ text += element.text + "\n\n"
176
+ return text
177
+
178
+
179
+ def scrape_links_with_selenium(driver: WebDriver, url: str) -> list[str]:
180
+ """Scrape links from a website using selenium
181
+
182
+ Args:
183
+ driver (WebDriver): The webdriver to use to scrape the links
184
+
185
+ Returns:
186
+ List[str]: The links scraped from the website
187
+ """
188
+ page_source = driver.page_source
189
+ soup = BeautifulSoup(page_source, "html.parser")
190
+
191
+ for script in soup(["script", "style"]):
192
+ script.extract()
193
+
194
+ hyperlinks = extract_hyperlinks(soup, url)
195
+
196
+ return format_hyperlinks(hyperlinks)
197
+
198
+
199
+ def close_browser(driver: WebDriver) -> None:
200
+ """Close the browser
201
+
202
+ Args:
203
+ driver (WebDriver): The webdriver to close
204
+
205
+ Returns:
206
+ None
207
+ """
208
+ driver.quit()
209
+
210
+
211
+ def add_header(driver: WebDriver) -> None:
212
+ """Add a header to the website
213
+
214
+ Args:
215
+ driver (WebDriver): The webdriver to use to add the header
216
+
217
+ Returns:
218
+ None
219
+ """
220
+ driver.execute_script(open(f"{FILE_DIR}/js/overlay.js", "r").read())
actions/web_search.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import json
3
+ from duckduckgo_search import DDGS
4
+
5
+ ddgs = DDGS()
6
+
7
+ def web_search(query: str, num_results: int = 4) -> str:
8
+ """Useful for general internet search queries."""
9
+ print("Searching with query {0}...".format(query))
10
+ search_results = []
11
+ if not query:
12
+ return json.dumps(search_results)
13
+
14
+ results = ddgs.text(query)
15
+ if not results:
16
+ return json.dumps(search_results)
17
+
18
+ total_added = 0
19
+ for j in results:
20
+ search_results.append(j)
21
+ total_added += 1
22
+ if total_added >= num_results:
23
+ break
24
+
25
+ return json.dumps(search_results, ensure_ascii=False, indent=4)
agent/__pycache__/llm_utils.cpython-311.pyc ADDED
Binary file (5.43 kB). View file
 
agent/__pycache__/prompts.cpython-311.pyc ADDED
Binary file (10.2 kB). View file
 
agent/__pycache__/research_agent.cpython-311.pyc ADDED
Binary file (11.1 kB). View file
 
agent/__pycache__/run.cpython-311.pyc ADDED
Binary file (4.01 kB). View file
 
agent/llm_utils.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+
5
+ from fastapi import WebSocket
6
+ import time
7
+
8
+ import openai
9
+ from langchain.adapters import openai as lc_openai
10
+ from colorama import Fore, Style
11
+ from openai.error import APIError, RateLimitError
12
+
13
+ from agent.prompts import auto_agent_instructions
14
+ from config import Config
15
+
16
+ CFG = Config()
17
+
18
+ openai.api_key = CFG.openai_api_key
19
+
20
+ from typing import Optional
21
+ import logging
22
+
23
+ def create_chat_completion(
24
+ messages: list, # type: ignore
25
+ model: Optional[str] = None,
26
+ temperature: float = CFG.temperature,
27
+ max_tokens: Optional[int] = None,
28
+ stream: Optional[bool] = False,
29
+ websocket: WebSocket | None = None,
30
+ ) -> str:
31
+ """Create a chat completion using the OpenAI API
32
+ Args:
33
+ messages (list[dict[str, str]]): The messages to send to the chat completion
34
+ model (str, optional): The model to use. Defaults to None.
35
+ temperature (float, optional): The temperature to use. Defaults to 0.9.
36
+ max_tokens (int, optional): The max tokens to use. Defaults to None.
37
+ stream (bool, optional): Whether to stream the response. Defaults to False.
38
+ Returns:
39
+ str: The response from the chat completion
40
+ """
41
+
42
+ # validate input
43
+ if model is None:
44
+ raise ValueError("Model cannot be None")
45
+ if max_tokens is not None and max_tokens > 8001:
46
+ raise ValueError(f"Max tokens cannot be more than 8001, but got {max_tokens}")
47
+ if stream and websocket is None:
48
+ raise ValueError("Websocket cannot be None when stream is True")
49
+
50
+ # create response
51
+ for attempt in range(10): # maximum of 10 attempts
52
+ response = send_chat_completion_request(
53
+ messages, model, temperature, max_tokens, stream, websocket
54
+ )
55
+ return response
56
+
57
+ logging.error("Failed to get response from OpenAI API")
58
+ raise RuntimeError("Failed to get response from OpenAI API")
59
+
60
+
61
+ def send_chat_completion_request(
62
+ messages, model, temperature, max_tokens, stream, websocket
63
+ ):
64
+ if not stream:
65
+ result = lc_openai.ChatCompletion.create(
66
+ model=model, # Change model here to use different models
67
+ messages=messages,
68
+ temperature=temperature,
69
+ max_tokens=max_tokens,
70
+ provider=CFG.llm_provider, # Change provider here to use a different API
71
+ )
72
+ return result["choices"][0]["message"]["content"]
73
+ else:
74
+ return stream_response(model, messages, temperature, max_tokens, websocket)
75
+
76
+
77
+ async def stream_response(model, messages, temperature, max_tokens, websocket):
78
+ paragraph = ""
79
+ response = ""
80
+ print(f"streaming response...")
81
+
82
+ for chunk in lc_openai.ChatCompletion.create(
83
+ model=model,
84
+ messages=messages,
85
+ temperature=temperature,
86
+ max_tokens=max_tokens,
87
+ provider=CFG.llm_provider,
88
+ stream=True,
89
+ ):
90
+ content = chunk["choices"][0].get("delta", {}).get("content")
91
+ if content is not None:
92
+ response += content
93
+ paragraph += content
94
+ if "\n" in paragraph:
95
+ await websocket.send_json({"type": "report", "output": paragraph})
96
+ paragraph = ""
97
+ print(f"streaming response complete")
98
+ return response
99
+
100
+
101
+ def choose_agent(task: str) -> dict:
102
+ """Determines what agent should be used
103
+ Args:
104
+ task (str): The research question the user asked
105
+ Returns:
106
+ agent - The agent that will be used
107
+ agent_role_prompt (str): The prompt for the agent
108
+ """
109
+ try:
110
+ response = create_chat_completion(
111
+ model=CFG.smart_llm_model,
112
+ messages=[
113
+ {"role": "system", "content": f"{auto_agent_instructions()}"},
114
+ {"role": "user", "content": f"task: {task}"}],
115
+ temperature=0,
116
+ )
117
+
118
+ return json.loads(response)
119
+ except Exception as e:
120
+ print(f"{Fore.RED}Error in choose_agent: {e}{Style.RESET_ALL}")
121
+ return {"agent": "Default Agent",
122
+ "agent_role_prompt": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."}
123
+
124
+
agent/prompts.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ def generate_agent_role_prompt(agent):
3
+ """ Generates the agent role prompt.
4
+ Args: agent (str): The type of the agent.
5
+ Returns: str: The agent role prompt.
6
+ """
7
+ prompts = {
8
+ "Finance Agent": "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends.",
9
+ "Travel Agent": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights.",
10
+ "Academic Research Agent": "You are an AI academic research assistant. Your primary responsibility is to create thorough, academically rigorous, unbiased, and systematically organized reports on a given research topic, following the standards of scholarly work.",
11
+ "Business Analyst": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis.",
12
+ "Computer Security Analyst Agent": "You are an AI specializing in computer security analysis. Your principal duty is to generate comprehensive, meticulously detailed, impartial, and systematically structured reports on computer security topics. This includes Exploits, Techniques, Threat Actors, and Advanced Persistent Threat (APT) Groups. All produced reports should adhere to the highest standards of scholarly work and provide in-depth insights into the complexities of computer security.",
13
+ "Default Agent": "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."
14
+ }
15
+
16
+ return prompts.get(agent, "No such agent")
17
+
18
+
19
+ def generate_report_prompt(question, research_summary):
20
+ """ Generates the report prompt for the given question and research summary.
21
+ Args: question (str): The question to generate the report prompt for
22
+ research_summary (str): The research summary to generate the report prompt for
23
+ Returns: str: The report prompt for the given question and research summary
24
+ """
25
+
26
+ return f'"""{research_summary}""" Using the above information, answer the following'\
27
+ f' question or topic: "{question}" in a detailed report --'\
28
+ " The report should focus on the answer to the question, should be well structured, informative," \
29
+ " in depth, with facts and numbers if available, a minimum of 1,200 words and with markdown syntax and apa format.\n "\
30
+ "You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.\n" \
31
+ f"Write all used source urls at the end of the report in apa format.\n " \
32
+ f"Assume that the current date is {datetime.now().strftime('%B %d, %Y')}"
33
+
34
+ def generate_search_queries_prompt(question):
35
+ """ Generates the search queries prompt for the given question.
36
+ Args: question (str): The question to generate the search queries prompt for
37
+ Returns: str: The search queries prompt for the given question
38
+ """
39
+
40
+ return f'Write 3 google search queries to search online that form an objective opinion from the following: "{question}"'\
41
+ f'Use the current date if needed: {datetime.now().strftime("%B %d, %Y")}.\n' \
42
+ f'You must respond with a list of strings in the following format: ["query 1", "query 2", "query 3"].'
43
+
44
+
45
+ def generate_resource_report_prompt(question, research_summary):
46
+ """Generates the resource report prompt for the given question and research summary.
47
+
48
+ Args:
49
+ question (str): The question to generate the resource report prompt for.
50
+ research_summary (str): The research summary to generate the resource report prompt for.
51
+
52
+ Returns:
53
+ str: The resource report prompt for the given question and research summary.
54
+ """
55
+ return f'"""{research_summary}""" Based on the above information, generate a bibliography recommendation report for the following' \
56
+ f' question or topic: "{question}". The report should provide a detailed analysis of each recommended resource,' \
57
+ ' explaining how each source can contribute to finding answers to the research question.' \
58
+ ' Focus on the relevance, reliability, and significance of each source.' \
59
+ ' Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax.' \
60
+ ' Include relevant facts, figures, and numbers whenever available.' \
61
+ ' The report should have a minimum length of 1,200 words.'
62
+
63
+
64
+ def generate_outline_report_prompt(question, research_summary):
65
+ """ Generates the outline report prompt for the given question and research summary.
66
+ Args: question (str): The question to generate the outline report prompt for
67
+ research_summary (str): The research summary to generate the outline report prompt for
68
+ Returns: str: The outline report prompt for the given question and research summary
69
+ """
70
+
71
+ return f'"""{research_summary}""" Using the above information, generate an outline for a research report in Markdown syntax'\
72
+ f' for the following question or topic: "{question}". The outline should provide a well-structured framework'\
73
+ ' for the research report, including the main sections, subsections, and key points to be covered.' \
74
+ ' The research report should be detailed, informative, in-depth, and a minimum of 1,200 words.' \
75
+ ' Use appropriate Markdown syntax to format the outline and ensure readability.'
76
+
77
+ def generate_concepts_prompt(question, research_summary):
78
+ """ Generates the concepts prompt for the given question.
79
+ Args: question (str): The question to generate the concepts prompt for
80
+ research_summary (str): The research summary to generate the concepts prompt for
81
+ Returns: str: The concepts prompt for the given question
82
+ """
83
+
84
+ return f'"""{research_summary}""" Using the above information, generate a list of 5 main concepts to learn for a research report'\
85
+ f' on the following question or topic: "{question}". The outline should provide a well-structured framework'\
86
+ 'You must respond with a list of strings in the following format: ["concepts 1", "concepts 2", "concepts 3", "concepts 4, concepts 5"]'
87
+
88
+
89
+ def generate_lesson_prompt(concept):
90
+ """
91
+ Generates the lesson prompt for the given question.
92
+ Args:
93
+ concept (str): The concept to generate the lesson prompt for.
94
+ Returns:
95
+ str: The lesson prompt for the given concept.
96
+ """
97
+
98
+ prompt = f'generate a comprehensive lesson about {concept} in Markdown syntax. This should include the definition'\
99
+ f'of {concept}, its historical background and development, its applications or uses in different'\
100
+ f'fields, and notable events or facts related to {concept}.'
101
+
102
+ return prompt
103
+
104
+ def get_report_by_type(report_type):
105
+ report_type_mapping = {
106
+ 'research_report': generate_report_prompt,
107
+ 'resource_report': generate_resource_report_prompt,
108
+ 'outline_report': generate_outline_report_prompt
109
+ }
110
+ return report_type_mapping[report_type]
111
+
112
+ def auto_agent_instructions():
113
+ return """
114
+ This task involves researching a given topic, regardless of its complexity or the availability of a definitive answer. The research is conducted by a specific agent, defined by its type and role, with each agent requiring distinct instructions.
115
+ Agent
116
+ The agent is determined by the field of the topic and the specific name of the agent that could be utilized to research the topic provided. Agents are categorized by their area of expertise, and each agent type is associated with a corresponding emoji.
117
+
118
+ examples:
119
+ task: "should I invest in apple stocks?"
120
+ response:
121
+ {
122
+ "agent": "💰 Finance Agent",
123
+ "agent_role_prompt: "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends."
124
+ }
125
+ task: "could reselling sneakers become profitable?"
126
+ response:
127
+ {
128
+ "agent": "📈 Business Analyst Agent",
129
+ "agent_role_prompt": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis."
130
+ }
131
+ task: "what are the most interesting sites in Tel Aviv?"
132
+ response:
133
+ {
134
+ "agent: "🌍 Travel Agent",
135
+ "agent_role_prompt": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights."
136
+ }
137
+ """
agent/research_agent.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Description: Research assistant class that handles the research process for a given question.
2
+
3
+ # libraries
4
+ import asyncio
5
+ import json
6
+ import hashlib
7
+
8
+ from actions.web_search import web_search
9
+ from actions.web_scrape import async_browse
10
+ from processing.text import \
11
+ write_to_file, \
12
+ create_message, \
13
+ create_chat_completion, \
14
+ read_txt_files, \
15
+ write_md_to_pdf
16
+ from config import Config
17
+ from agent import prompts
18
+ import os
19
+ import string
20
+
21
+
22
+ CFG = Config()
23
+
24
+
25
+ class ResearchAgent:
26
+ def __init__(self, question, agent, agent_role_prompt, websocket=None):
27
+ """ Initializes the research assistant with the given question.
28
+ Args: question (str): The question to research
29
+ Returns: None
30
+ """
31
+
32
+ self.question = question
33
+ self.agent = agent
34
+ self.agent_role_prompt = agent_role_prompt if agent_role_prompt else prompts.generate_agent_role_prompt(agent)
35
+ self.visited_urls = set()
36
+ self.research_summary = ""
37
+ self.dir_path = f"./outputs/{hashlib.sha1(question.encode()).hexdigest()}"
38
+ self.websocket = websocket
39
+
40
+ async def stream_output(self, output):
41
+ if not self.websocket:
42
+ return print(output)
43
+ await self.websocket.send_json({"type": "logs", "output": output})
44
+
45
+
46
+ async def summarize(self, text, topic):
47
+ """ Summarizes the given text for the given topic.
48
+ Args: text (str): The text to summarize
49
+ topic (str): The topic to summarize the text for
50
+ Returns: str: The summarized text
51
+ """
52
+
53
+ messages = [create_message(text, topic)]
54
+ await self.stream_output(f"📝 Summarizing text for query: {text}")
55
+
56
+ return create_chat_completion(
57
+ model=CFG.fast_llm_model,
58
+ messages=messages,
59
+ )
60
+
61
+ async def get_new_urls(self, url_set_input):
62
+ """ Gets the new urls from the given url set.
63
+ Args: url_set_input (set[str]): The url set to get the new urls from
64
+ Returns: list[str]: The new urls from the given url set
65
+ """
66
+
67
+ new_urls = []
68
+ for url in url_set_input:
69
+ if url not in self.visited_urls:
70
+ await self.stream_output(f"✅ Adding source url to research: {url}\n")
71
+
72
+ self.visited_urls.add(url)
73
+ new_urls.append(url)
74
+
75
+ return new_urls
76
+
77
+ async def call_agent(self, action, stream=False, websocket=None):
78
+ messages = [{
79
+ "role": "system",
80
+ "content": self.agent_role_prompt
81
+ }, {
82
+ "role": "user",
83
+ "content": action,
84
+ }]
85
+ answer = create_chat_completion(
86
+ model=CFG.smart_llm_model,
87
+ messages=messages,
88
+ stream=stream,
89
+ websocket=websocket,
90
+ )
91
+ return answer
92
+
93
+ async def create_search_queries(self):
94
+ """ Creates the search queries for the given question.
95
+ Args: None
96
+ Returns: list[str]: The search queries for the given question
97
+ """
98
+ result = await self.call_agent(prompts.generate_search_queries_prompt(self.question))
99
+ await self.stream_output(f"🧠 I will conduct my research based on the following queries: {result}...")
100
+ return json.loads(result)
101
+
102
+ async def async_search(self, query):
103
+ """ Runs the async search for the given query.
104
+ Args: query (str): The query to run the async search for
105
+ Returns: list[str]: The async search for the given query
106
+ """
107
+ search_results = json.loads(web_search(query))
108
+ new_search_urls = self.get_new_urls([url.get("href") for url in search_results])
109
+
110
+ await self.stream_output(f"🌐 Browsing the following sites for relevant information: {new_search_urls}...")
111
+
112
+ # Create a list to hold the coroutine objects
113
+ tasks = [async_browse(url, query, self.websocket) for url in await new_search_urls]
114
+
115
+ # Gather the results as they become available
116
+ responses = await asyncio.gather(*tasks, return_exceptions=True)
117
+
118
+ return responses
119
+
120
+ async def run_search_summary(self, query):
121
+ """ Runs the search summary for the given query.
122
+ Args: query (str): The query to run the search summary for
123
+ Returns: str: The search summary for the given query
124
+ """
125
+
126
+ await self.stream_output(f"🔎 Running research for '{query}'...")
127
+
128
+ responses = await self.async_search(query)
129
+
130
+ result = "\n".join(responses)
131
+ os.makedirs(os.path.dirname(f"{self.dir_path}/research-{query}.txt"), exist_ok=True)
132
+ write_to_file(f"{self.dir_path}/research-{query}.txt", result)
133
+ return result
134
+
135
+ async def conduct_research(self):
136
+ """ Conducts the research for the given question.
137
+ Args: None
138
+ Returns: str: The research for the given question
139
+ """
140
+ self.research_summary = read_txt_files(self.dir_path) if os.path.isdir(self.dir_path) else ""
141
+
142
+ if not self.research_summary:
143
+ search_queries = await self.create_search_queries()
144
+ for query in search_queries:
145
+ research_result = await self.run_search_summary(query)
146
+ self.research_summary += f"{research_result}\n\n"
147
+
148
+ await self.stream_output(f"Total research words: {len(self.research_summary.split(' '))}")
149
+
150
+ return self.research_summary
151
+
152
+
153
+ async def create_concepts(self):
154
+ """ Creates the concepts for the given question.
155
+ Args: None
156
+ Returns: list[str]: The concepts for the given question
157
+ """
158
+ result = self.call_agent(prompts.generate_concepts_prompt(self.question, self.research_summary))
159
+
160
+ await self.stream_output(f"I will research based on the following concepts: {result}\n")
161
+ return json.loads(result)
162
+
163
+ async def write_report(self, report_type, websocket=None):
164
+ """ Writes the report for the given question.
165
+ Args: None
166
+ Returns: str: The report for the given question
167
+ """
168
+ report_type_func = prompts.get_report_by_type(report_type)
169
+ await self.stream_output(f"✍️ Writing {report_type} for research task: {self.question}...")
170
+
171
+ answer = await self.call_agent(report_type_func(self.question, self.research_summary),
172
+ stream=websocket is not None, websocket=websocket)
173
+ # if websocket is True than we are streaming gpt response, so we need to wait for the final response
174
+ final_report = await answer if websocket else answer
175
+
176
+ path = await write_md_to_pdf(report_type, self.dir_path, final_report)
177
+
178
+ return answer, path
179
+
180
+ async def write_lessons(self):
181
+ """ Writes lessons on essential concepts of the research.
182
+ Args: None
183
+ Returns: None
184
+ """
185
+ concepts = await self.create_concepts()
186
+ for concept in concepts:
187
+ answer = await self.call_agent(prompts.generate_lesson_prompt(concept), stream=True)
188
+ await write_md_to_pdf("Lesson", self.dir_path, answer)
agent/run.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import datetime
3
+
4
+ from typing import List, Dict
5
+ from fastapi import WebSocket
6
+ from config import check_openai_api_key
7
+ from agent.research_agent import ResearchAgent
8
+
9
+
10
+ class WebSocketManager:
11
+ def __init__(self):
12
+ self.active_connections: List[WebSocket] = []
13
+ self.sender_tasks: Dict[WebSocket, asyncio.Task] = {}
14
+ self.message_queues: Dict[WebSocket, asyncio.Queue] = {}
15
+
16
+ async def start_sender(self, websocket: WebSocket):
17
+ queue = self.message_queues[websocket]
18
+ while True:
19
+ message = await queue.get()
20
+ if websocket in self.active_connections:
21
+ await websocket.send_text(message)
22
+ else:
23
+ break
24
+
25
+ async def connect(self, websocket: WebSocket):
26
+ await websocket.accept()
27
+ self.active_connections.append(websocket)
28
+ self.message_queues[websocket] = asyncio.Queue()
29
+ self.sender_tasks[websocket] = asyncio.create_task(self.start_sender(websocket))
30
+
31
+ async def disconnect(self, websocket: WebSocket):
32
+ self.active_connections.remove(websocket)
33
+ self.sender_tasks[websocket].cancel()
34
+ del self.sender_tasks[websocket]
35
+ del self.message_queues[websocket]
36
+
37
+ async def start_streaming(self, task, report_type, agent, agent_role_prompt, websocket):
38
+ report, path = await run_agent(task, report_type, agent, agent_role_prompt, websocket)
39
+ return report, path
40
+
41
+
42
+ async def run_agent(task, report_type, agent, agent_role_prompt, websocket):
43
+ check_openai_api_key()
44
+
45
+ start_time = datetime.datetime.now()
46
+
47
+ # await websocket.send_json({"type": "logs", "output": f"Start time: {str(start_time)}\n\n"})
48
+
49
+ assistant = ResearchAgent(task, agent, agent_role_prompt, websocket)
50
+ await assistant.conduct_research()
51
+
52
+ report, path = await assistant.write_report(report_type, websocket)
53
+
54
+ await websocket.send_json({"type": "path", "output": path})
55
+
56
+ end_time = datetime.datetime.now()
57
+ await websocket.send_json({"type": "logs", "output": f"\nEnd time: {end_time}\n"})
58
+ await websocket.send_json({"type": "logs", "output": f"\nTotal run time: {end_time - start_time}\n"})
59
+
60
+ return report, path
client/index.html ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <title>GPT Researcher</title>
6
+ <meta name="description" content="A research assistant powered by GPT-4">
7
+ <meta name="viewport" content="width=device-width, initial-scale=1">
8
+ <link rel="icon" href="./static/favicon.ico">
9
+ <link rel="preconnect" href="https://fonts.googleapis.com">
10
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
11
+ <link href="https://fonts.googleapis.com/css2?family=Montserrat:wght@400;700&display=swap" rel="stylesheet">
12
+ <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.2/css/bootstrap.min.css" rel="stylesheet">
13
+ <link rel="stylesheet" href="/site/styles.css"/>
14
+ <style>
15
+ .avatar {
16
+ width: 60px;
17
+ height: 60px;
18
+ border-radius: 50%;
19
+ }
20
+
21
+ .agent-name {
22
+ text-align: center;
23
+ }
24
+
25
+ .agent-item {
26
+ display: flex;
27
+ flex-direction: column;
28
+ align-items: center;
29
+ }
30
+
31
+ .agent-choices {
32
+ display: none;
33
+ }
34
+
35
+ .btn-show {
36
+ display: none;
37
+ }
38
+ </style>
39
+ </head>
40
+
41
+ <body>
42
+
43
+ <section class="landing">
44
+ <div class="max-w-5xl mx-auto text-center">
45
+ <h1 class="text-4xl font-extrabold mx-auto lg:text-7xl">
46
+ Say Goodbye to <br>
47
+ <span
48
+ style="background-image:linear-gradient(to right, #9867F0, #ED4E50); -webkit-background-clip: text; -webkit-text-fill-color: transparent;">Hours
49
+ of Research</span>
50
+ </h1>
51
+ <p class="max-w-5xl mx-auto text-gray-600 mt-8" style="font-size:20px">
52
+ Say Hello to GPT Researcher, your AI mate for rapid insights and comprehensive research. GPT Researcher
53
+ takes care of everything from accurate source gathering to organization of research results - all in one
54
+ platform designed to make your research process a breeze.
55
+ </p>
56
+ <a href="#form" class="btn btn-primary">Get Started</a>
57
+ </div>
58
+ </section>
59
+
60
+ <main class="container" id="form">
61
+ <div class="agent-item"><img src="/static/defaultAgentAvatar.JPG" class="avatar"
62
+ alt="Auto Agent"></div>
63
+ <form method="POST" class="mt-3" onsubmit="GPTResearcher.startResearch(); return false;">
64
+ <div class="form-group">
65
+ <label for="task" class="agent-question">What would you like me to research next?</label>
66
+ <input type="text" id="task" name="task" class="form-control" required>
67
+ <input type="radio" name="agent" id="autoAgent" value="Auto Agent" checked hidden>
68
+ </div>
69
+ <div class="form-group">
70
+ <div class="row">
71
+
72
+
73
+ </div>
74
+ <button type="button" id="btnShowAuto" class="btn btn-secondary mt-3 btn-show">Auto Agent</button>
75
+ </div>
76
+ <div class="form-group">
77
+ <label for="report_type" class="agent-question">What type of report would you like me to generate?</label>
78
+ <select name="report_type" class="form-control" required>
79
+ <option value="research_report">Research Report</option>
80
+ <option value="resource_report">Resource Report</option>
81
+ <option value="outline_report">Outline Report</option>
82
+ </select>
83
+ </div>
84
+ <input type="submit" value="Research" class="btn btn-primary button-padding">
85
+ </form>
86
+
87
+ <div class="margin-div">
88
+ <h2>Agent Output</h2>
89
+ <p class="mt-2 text-left" style="font-size: 0.8rem;">An agent tailored specifically to your task
90
+ will be generated to provide the most precise and relevant research results.</p>
91
+ <div id="output"></div>
92
+ </div>
93
+ <div class="margin-div">
94
+ <h2>Research Report</h2>
95
+ <div id="reportContainer"></div>
96
+ <div id="reportActions">
97
+ <div class="alert alert-info" role="alert" id="status"></div>
98
+ <a onclick="GPTResearcher.copyToClipboard()" class="btn btn-secondary mt-3">Copy to clipboard</button>
99
+ <a id="downloadLink" href="#" class="btn btn-secondary mt-3" target="_blank">Download as PDF</a>
100
+ </div>
101
+ </div>
102
+ </main>
103
+
104
+ <footer>
105
+ <p>GPT Researcher &copy; 2023 | <a target="_blank" href="https://github.com/assafelovic/gpt-researcher">GitHub
106
+ Page</a></p>
107
+ </footer>
108
+
109
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/showdown/1.9.1/showdown.min.js"></script>
110
+ <script src="/site/scripts.js"></script>
111
+ <script>
112
+ // const btnChoose = document.getElementById('btnChoose');
113
+ const btnShowAuto = document.getElementById('btnShowAuto');
114
+ const autoAgentDiv = document.getElementById('autoAgentDiv');
115
+ const agentChoices = document.getElementsByClassName('agent-choices');
116
+
117
+ /**
118
+ btnChoose.addEventListener('click', function () {
119
+ btnShowAuto.style.display = 'inline-block';
120
+ btnChoose.style.display = 'none';
121
+ autoAgentDiv.style.display = 'none';
122
+ agentChoices[0].style.display = 'flex';
123
+ });
124
+ **/
125
+
126
+ btnShowAuto.addEventListener('click', function () {
127
+ btnShowAuto.style.display = 'none';
128
+ btnChoose.style.display = 'inline-block';
129
+ autoAgentDiv.style.display = 'flex';
130
+ agentChoices[0].style.display = 'none';
131
+ });
132
+ </script>
133
+ </body>
134
+
135
+ </html>
client/scripts.js ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const GPTResearcher = (() => {
2
+ const init = () => {
3
+ // Not sure, but I think it would be better to add event handlers here instead of in the HTML
4
+ //document.getElementById("startResearch").addEventListener("click", startResearch);
5
+ //document.getElementById("copyToClipboard").addEventListener("click", copyToClipboard);
6
+
7
+ updateState("initial");
8
+ }
9
+
10
+ const startResearch = () => {
11
+ document.getElementById("output").innerHTML = "";
12
+ document.getElementById("reportContainer").innerHTML = "";
13
+ updateState("in_progress")
14
+
15
+ addAgentResponse({ output: "🤔 Thinking about research questions for the task..." });
16
+
17
+ listenToSockEvents();
18
+ };
19
+
20
+ const listenToSockEvents = () => {
21
+ const { protocol, host, pathname } = window.location;
22
+ const ws_uri = `${protocol === 'https:' ? 'wss:' : 'ws:'}//${host}${pathname}ws`;
23
+ const converter = new showdown.Converter();
24
+ const socket = new WebSocket(ws_uri);
25
+
26
+ socket.onmessage = (event) => {
27
+ const data = JSON.parse(event.data);
28
+ if (data.type === 'logs') {
29
+ addAgentResponse(data);
30
+ } else if (data.type === 'report') {
31
+ writeReport(data, converter);
32
+ } else if (data.type === 'path') {
33
+ updateState("finished")
34
+ updateDownloadLink(data);
35
+
36
+ }
37
+ };
38
+
39
+ socket.onopen = (event) => {
40
+ const task = document.querySelector('input[name="task"]').value;
41
+ const report_type = document.querySelector('select[name="report_type"]').value;
42
+ const agent = document.querySelector('input[name="agent"]:checked').value;
43
+
44
+ const requestData = {
45
+ task: task,
46
+ report_type: report_type,
47
+ agent: agent,
48
+ };
49
+
50
+ socket.send(`start ${JSON.stringify(requestData)}`);
51
+ };
52
+ };
53
+
54
+ const addAgentResponse = (data) => {
55
+ const output = document.getElementById("output");
56
+ output.innerHTML += '<div class="agent_response">' + data.output + '</div>';
57
+ output.scrollTop = output.scrollHeight;
58
+ output.style.display = "block";
59
+ updateScroll();
60
+ };
61
+
62
+ const writeReport = (data, converter) => {
63
+ const reportContainer = document.getElementById("reportContainer");
64
+ const markdownOutput = converter.makeHtml(data.output);
65
+ reportContainer.innerHTML += markdownOutput;
66
+ updateScroll();
67
+ };
68
+
69
+ const updateDownloadLink = (data) => {
70
+ const path = data.output;
71
+ document.getElementById("downloadLink").setAttribute("href", path);
72
+ };
73
+
74
+ const updateScroll = () => {
75
+ window.scrollTo(0, document.body.scrollHeight);
76
+ };
77
+
78
+ const copyToClipboard = () => {
79
+ const textarea = document.createElement('textarea');
80
+ textarea.id = 'temp_element';
81
+ textarea.style.height = 0;
82
+ document.body.appendChild(textarea);
83
+ textarea.value = document.getElementById('reportContainer').innerText;
84
+ const selector = document.querySelector('#temp_element');
85
+ selector.select();
86
+ document.execCommand('copy');
87
+ document.body.removeChild(textarea);
88
+ };
89
+
90
+ const updateState = (state) => {
91
+ var status = "";
92
+ switch (state) {
93
+ case "in_progress":
94
+ status = "Research in progress..."
95
+ setReportActionsStatus("disabled");
96
+ break;
97
+ case "finished":
98
+ status = "Research finished!"
99
+ setReportActionsStatus("enabled");
100
+ break;
101
+ case "error":
102
+ status = "Research failed!"
103
+ setReportActionsStatus("disabled");
104
+ break;
105
+ case "initial":
106
+ status = ""
107
+ setReportActionsStatus("hidden");
108
+ break;
109
+ default:
110
+ setReportActionsStatus("disabled");
111
+ }
112
+ document.getElementById("status").innerHTML = status;
113
+ if (document.getElementById("status").innerHTML == "") {
114
+ document.getElementById("status").style.display = "none";
115
+ } else {
116
+ document.getElementById("status").style.display = "block";
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Shows or hides the download and copy buttons
122
+ * @param {str} status Kind of hacky. Takes "enabled", "disabled", or "hidden". "Hidden is same as disabled but also hides the div"
123
+ */
124
+ const setReportActionsStatus = (status) => {
125
+ const reportActions = document.getElementById("reportActions");
126
+ // Disable everything in reportActions until research is finished
127
+
128
+ if (status == "enabled") {
129
+ reportActions.querySelectorAll("a").forEach((link) => {
130
+ link.classList.remove("disabled");
131
+ link.removeAttribute('onclick');
132
+ reportActions.style.display = "block";
133
+ });
134
+ } else {
135
+ reportActions.querySelectorAll("a").forEach((link) => {
136
+ link.classList.add("disabled");
137
+ link.setAttribute('onclick', "return false;");
138
+ });
139
+ if (status == "hidden") {
140
+ reportActions.style.display = "none";
141
+ }
142
+ }
143
+ }
144
+
145
+ document.addEventListener("DOMContentLoaded", init);
146
+ return {
147
+ startResearch,
148
+ copyToClipboard,
149
+ };
150
+ })();
client/static/academicResearchAgentAvatar.png ADDED
client/static/businessAnalystAgentAvatar.png ADDED
client/static/computerSecurityanalystAvatar.png ADDED
client/static/defaultAgentAvatar.JPG ADDED
client/static/favicon.ico ADDED
client/static/financeAgentAvatar.png ADDED
client/static/mathAgentAvatar.png ADDED
client/static/travelAgentAvatar.png ADDED
client/styles.css ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @keyframes gradientBG {
2
+ 0% {background-position: 0% 50%;}
3
+ 50% {background-position: 100% 50%;}
4
+ 100% {background-position: 0% 50%;}
5
+ }
6
+
7
+ body {
8
+ font-family: 'Montserrat', sans-serif;
9
+ color: #fff;
10
+ line-height: 1.6;
11
+ background-size: 200% 200%;
12
+ background-image: linear-gradient(45deg, #151A2D, #2D284D, #151A2D);
13
+ animation: gradientBG 10s ease infinite;
14
+ }
15
+
16
+ .landing {
17
+ display: flex;
18
+ justify-content: center;
19
+ align-items: center;
20
+ height: 100vh;
21
+ text-align: center;
22
+ }
23
+
24
+ .landing h1 {
25
+ font-size: 3.5rem;
26
+ font-weight: 700;
27
+ margin-bottom: 2rem;
28
+ }
29
+
30
+ .landing p {
31
+ font-size: 1.5rem;
32
+ font-weight: 400;
33
+ max-width: 500px;
34
+ margin: auto;
35
+ margin-bottom: 2rem;
36
+ }
37
+
38
+ .container {
39
+ max-width: 900px;
40
+ margin: auto;
41
+ padding: 20px;
42
+ background-color: rgba(255, 255, 255, 0.1);
43
+ border-radius: 12px;
44
+ box-shadow: 0px 10px 25px rgba(0, 0, 0, 0.1);
45
+ transition: all .3s ease-in-out;
46
+ margin-bottom: 180px;
47
+ }
48
+
49
+ .container:hover {
50
+ transform: scale(1.01);
51
+ box-shadow: 0px 15px 30px rgba(0, 0, 0, 0.2);
52
+ }
53
+
54
+ input, select, #output, #reportContainer {
55
+ background-color: rgba(255,255,255,0.1);
56
+ border: none;
57
+ color: #fff;
58
+ transition: all .3s ease-in-out;
59
+ }
60
+
61
+ input:hover, input:focus, select:hover, select:focus {
62
+ background-color: #dfe4ea;
63
+ border: 1px solid rgba(255, 255, 255, 0.5);
64
+ box-shadow: 0px 4px 8px rgba(0, 0, 0, 0.1);
65
+ transition: all 0.3s ease-in-out;
66
+ }
67
+
68
+ .btn-primary {
69
+ background: linear-gradient(to right, #0062cc, #007bff);
70
+ border: none;
71
+ transition: all .3s ease-in-out;
72
+ }
73
+
74
+ .btn-secondary {
75
+ background: linear-gradient(to right, #6c757d, #6c757d);
76
+ border: none;
77
+ transition: all .3s ease-in-out;
78
+ }
79
+
80
+ .btn:hover {
81
+ opacity: 0.8;
82
+ transform: scale(1.1);
83
+ box-shadow: 0px 10px 20px rgba(0, 0, 0, 0.3);
84
+ }
85
+
86
+ .agent_question {
87
+ font-size: 1.2rem;
88
+ font-weight: 500;
89
+ margin-bottom: 0.5rem;
90
+ }
91
+
92
+ footer {
93
+ position: fixed;
94
+ left: 0;
95
+ bottom: 0;
96
+ width: 100%;
97
+ background: linear-gradient(to right, #151A2D, #111827);
98
+ color: white;
99
+ text-align: center;
100
+ padding: 10px 0;
101
+ }
102
+
103
+ .margin-div {
104
+ margin-top: 20px;
105
+ margin-bottom: 20px;
106
+ padding: 10px;
107
+ }
108
+
109
+ .agent_response {
110
+ background-color: #747d8c;
111
+ margin: 10px;
112
+ padding: 10px;
113
+ border-radius: 12px;
114
+ }
115
+
116
+ #output {
117
+ height: 300px;
118
+ overflow: auto;
119
+ padding: 10px;
120
+ margin-bottom: 10px;
121
+ margin-top: 10px;
122
+ }
123
+
124
+ #reportContainer {
125
+ background-color: rgba(255,255,255,0.1);
126
+ border: none;
127
+ color: #fff;
128
+ transition: all .3s ease-in-out;
129
+ padding: 10px;
130
+ border-radius: 12px;
131
+ }
config/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from config.config import Config, check_openai_api_key
2
+ from config.singleton import AbstractSingleton, Singleton
3
+
4
+ __all__ = [
5
+ "check_openai_api_key",
6
+ "AbstractSingleton",
7
+ "Config",
8
+ "Singleton",
9
+ ]
config/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (411 Bytes). View file
 
config/__pycache__/config.cpython-311.pyc ADDED
Binary file (5.26 kB). View file
 
config/__pycache__/singleton.cpython-311.pyc ADDED
Binary file (1.46 kB). View file
 
config/config.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration class to store the state of bools for different scripts access."""
2
+ import os
3
+
4
+ import openai
5
+ from colorama import Fore
6
+ from dotenv import load_dotenv
7
+
8
+ from config.singleton import Singleton
9
+
10
+ load_dotenv(verbose=True)
11
+
12
+
13
+ class Config(metaclass=Singleton):
14
+ """
15
+ Configuration class to store the state of bools for different scripts access.
16
+ """
17
+
18
+ def __init__(self) -> None:
19
+ """Initialize the Config class"""
20
+ self.debug_mode = False
21
+ self.allow_downloads = False
22
+
23
+ self.selenium_web_browser = os.getenv("USE_WEB_BROWSER", "chrome")
24
+ self.llm_provider = os.getenv("LLM_PROVIDER", "ChatOpenAI")
25
+ self.fast_llm_model = os.getenv("FAST_LLM_MODEL", "gpt-3.5-turbo-16k")
26
+ self.smart_llm_model = os.getenv("SMART_LLM_MODEL", "gpt-4")
27
+ self.fast_token_limit = int(os.getenv("FAST_TOKEN_LIMIT", 2000))
28
+ self.smart_token_limit = int(os.getenv("SMART_TOKEN_LIMIT", 4000))
29
+ self.browse_chunk_max_length = int(os.getenv("BROWSE_CHUNK_MAX_LENGTH", 8192))
30
+ self.summary_token_limit = int(os.getenv("SUMMARY_TOKEN_LIMIT", 700))
31
+
32
+ self.openai_api_key = os.getenv("OPENAI_API_KEY")
33
+ self.temperature = float(os.getenv("TEMPERATURE", "1"))
34
+
35
+ self.user_agent = os.getenv(
36
+ "USER_AGENT",
37
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36"
38
+ " (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
39
+ )
40
+
41
+ self.memory_backend = os.getenv("MEMORY_BACKEND", "local")
42
+ # Initialize the OpenAI API client
43
+ openai.api_key = self.openai_api_key
44
+
45
+ def set_fast_llm_model(self, value: str) -> None:
46
+ """Set the fast LLM model value."""
47
+ self.fast_llm_model = value
48
+
49
+ def set_smart_llm_model(self, value: str) -> None:
50
+ """Set the smart LLM model value."""
51
+ self.smart_llm_model = value
52
+
53
+ def set_fast_token_limit(self, value: int) -> None:
54
+ """Set the fast token limit value."""
55
+ self.fast_token_limit = value
56
+
57
+ def set_smart_token_limit(self, value: int) -> None:
58
+ """Set the smart token limit value."""
59
+ self.smart_token_limit = value
60
+
61
+ def set_browse_chunk_max_length(self, value: int) -> None:
62
+ """Set the browse_website command chunk max length value."""
63
+ self.browse_chunk_max_length = value
64
+
65
+ def set_openai_api_key(self, value: str) -> None:
66
+ """Set the OpenAI API key value."""
67
+ self.openai_api_key = value
68
+
69
+ def set_debug_mode(self, value: bool) -> None:
70
+ """Set the debug mode value."""
71
+ self.debug_mode = value
72
+
73
+
74
+ def check_openai_api_key() -> None:
75
+ """Check if the OpenAI API key is set in config.py or as an environment variable."""
76
+ cfg = Config()
77
+ if not cfg.openai_api_key:
78
+ print(
79
+ Fore.RED
80
+ + "Please set your OpenAI API key in .env or as an environment variable."
81
+ )
82
+ print("You can get your key from https://platform.openai.com/account/api-keys")
83
+ exit(1)
config/singleton.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """The singleton metaclass for ensuring only one instance of a class."""
2
+ import abc
3
+
4
+
5
+ class Singleton(abc.ABCMeta, type):
6
+ """
7
+ Singleton metaclass for ensuring only one instance of a class.
8
+ """
9
+
10
+ _instances = {}
11
+
12
+ def __call__(cls, *args, **kwargs):
13
+ """Call method for the singleton metaclass."""
14
+ if cls not in cls._instances:
15
+ cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
16
+ return cls._instances[cls]
17
+
18
+
19
+ class AbstractSingleton(abc.ABC, metaclass=Singleton):
20
+ """
21
+ Abstract singleton class for ensuring only one instance of a class.
22
+ """
23
+
24
+ pass
docker-compose.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ version: '3'
2
+ services:
3
+ gpt-researcher:
4
+ image: assafelovic/gpt-researcher
5
+ build: ./
6
+ environment:
7
+ OPENAI_API_KEY: ${OPENAI_API_KEY}
8
+ ports:
9
+ - 8000:8000
js/overlay.js ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const overlay = document.createElement('div');
2
+ Object.assign(overlay.style, {
3
+ position: 'fixed',
4
+ zIndex: 999999,
5
+ top: 0,
6
+ left: 0,
7
+ width: '100%',
8
+ height: '100%',
9
+ background: 'rgba(0, 0, 0, 0.7)',
10
+ color: '#fff',
11
+ fontSize: '24px',
12
+ fontWeight: 'bold',
13
+ display: 'flex',
14
+ justifyContent: 'center',
15
+ alignItems: 'center',
16
+ });
17
+ const textContent = document.createElement('div');
18
+ Object.assign(textContent.style, {
19
+ textAlign: 'center',
20
+ });
21
+ textContent.textContent = 'Tavily AI: Analyzing Page';
22
+ overlay.appendChild(textContent);
23
+ document.body.append(overlay);
24
+ document.body.style.overflow = 'hidden';
25
+ let dotCount = 0;
26
+ setInterval(() => {
27
+ textContent.textContent = 'Tavily AI: Analyzing Page' + '.'.repeat(dotCount);
28
+ dotCount = (dotCount + 1) % 4;
29
+ }, 1000);
main.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, WebSocket, WebSocketDisconnect
2
+ from fastapi.staticfiles import StaticFiles
3
+ from fastapi.templating import Jinja2Templates
4
+ from pydantic import BaseModel
5
+ import json
6
+ import os
7
+
8
+ from agent.llm_utils import choose_agent
9
+ from agent.run import WebSocketManager
10
+
11
+
12
+ class ResearchRequest(BaseModel):
13
+ task: str
14
+ report_type: str
15
+ agent: str
16
+
17
+
18
+
19
+ app = FastAPI()
20
+ app.mount("/site", StaticFiles(directory="client"), name="site")
21
+ app.mount("/static", StaticFiles(directory="client/static"), name="static")
22
+ # Dynamic directory for outputs once first research is run
23
+ @app.on_event("startup")
24
+ def startup_event():
25
+ if not os.path.isdir("outputs"):
26
+ os.makedirs("outputs")
27
+ app.mount("/outputs", StaticFiles(directory="outputs"), name="outputs")
28
+
29
+ templates = Jinja2Templates(directory="client")
30
+
31
+ manager = WebSocketManager()
32
+
33
+
34
+ @app.get("/")
35
+ async def read_root(request: Request):
36
+ return templates.TemplateResponse('index.html', {"request": request, "report": None})
37
+
38
+
39
+ @app.websocket("/ws")
40
+ async def websocket_endpoint(websocket: WebSocket):
41
+ await manager.connect(websocket)
42
+ try:
43
+ while True:
44
+ data = await websocket.receive_text()
45
+ if data.startswith("start"):
46
+ json_data = json.loads(data[6:])
47
+ task = json_data.get("task")
48
+ report_type = json_data.get("report_type")
49
+ agent = json_data.get("agent")
50
+ # temporary so "normal agents" can still be used and not just auto generated, will be removed when we move to auto generated
51
+ if agent == "Auto Agent":
52
+ agent_dict = choose_agent(task)
53
+ agent = agent_dict.get("agent")
54
+ agent_role_prompt = agent_dict.get("agent_role_prompt")
55
+ else:
56
+ agent_role_prompt = None
57
+
58
+ await websocket.send_json({"type": "logs", "output": f"Initiated an Agent: {agent}"})
59
+ if task and report_type and agent:
60
+ await manager.start_streaming(task, report_type, agent, agent_role_prompt, websocket)
61
+ else:
62
+ print("Error: not enough parameters provided.")
63
+
64
+ except WebSocketDisconnect:
65
+ await manager.disconnect(websocket)
66
+
67
+
68
+ if __name__ == "__main__":
69
+ import uvicorn
70
+
71
+ uvicorn.run(app, host="0.0.0.0", port=8000)
outputs/540826e12734403b01e368d80fefc9cae0571027/research-Papers on understanding learning mechanisms through chain of thought prompting.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Information gathered from url https://arxiv.org/abs/2212.10001: The text does not provide specific information about papers on understanding learning mechanisms through chain of thought prompting. It primarily focuses on a paper titled "Towards Understanding Chain-of-Thought Prompting: An Empirical Study of What Matters" in the field of Computer Science.
2
+ Information gathered from url https://arxiv.org/abs/2305.15408: The text does not provide any information regarding papers on understanding learning mechanisms through chain of thought prompting.
3
+ Information gathered from url https://aclanthology.org/2023.acl-long.153/: The text does not provide any information regarding papers on understanding learning mechanisms through chain of thought prompting.
4
+ Information gathered from url https://arxiv.org/pdf/2212.10001.pdf: Error: No text to summarize
outputs/540826e12734403b01e368d80fefc9cae0571027/research-Scientific articles on how thought promptings affect decision making - October 11, 2023.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Information gathered from url https://medicalxpress.com/news/2023-08-brain-choices-decision-making.html: The provided text does not mention any scientific articles specifically on how thought promptings affect decision making on October 11, 2023. Therefore, there is no factual information, numbers, stats, or any other specific details to summarize.
2
+ Information gathered from url https://www.frontiersin.org/articles/10.3389/fpsyg.2023.1129835/full: The given text does not provide any information about scientific articles specifically on how thought promptings affect decision making on October 11, 2023. However, it does include citations of various scientific articles that discuss topics related to cognitive biases, decision making, and human behavior.
3
+ Information gathered from url https://www.sciencedaily.com/releases/2022/10/221014135704.htm: The text does not provide any information about scientific articles on how thought promptings affect decision making on October 11, 2023.
4
+ Information gathered from url https://news.stanford.edu/report/2021/09/15/mindsets-clearing-lens-life/: The text does not provide any information about scientific articles on how thought promptings affect decision making on October 11, 2023.
outputs/540826e12734403b01e368d80fefc9cae0571027/research-The cognitive process behind chain of thought prompting.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Information gathered from url https://www.promptingguide.ai/techniques/cot: The cognitive process behind chain-of-thought (CoT) prompting involves enabling complex reasoning capabilities through intermediate reasoning steps. It allows for better results on complex tasks that require reasoning before responding. The authors claim that this is an emergent ability that arises with sufficiently large language models.
2
+ Information gathered from url https://deepgram.com/learn/chain-of-thought-prompting-guide: The cognitive process behind chain of thought (CoT) prompting involves encouraging large language models (LLMs) to break down complex thoughts into intermediate steps. CoT prompts provide a series of examples or steps to guide LLMs in reasoning and reaching a final answer. It allows LLMs to decompose complex problems similar to how humans break down complicated math or logic questions into smaller steps. CoT prompting has been shown to boost LLMs' performance in complex arithmetic, commonsense, and symbolic reasoning tasks. It has been tested on various math word problem benchmarks and has shown minimal benefits until LLMs reach around 100 billion parameters. CoT prompting has also achieved state-of-the-art performance on certain benchmarks, surpassing models specifically finetuned on math word problems.
3
+ Information gathered from url https://learnprompting.org/docs/intermediate/chain_of_thought: The cognitive process behind chain of thought prompting involves encouraging the language model to explain its reasoning. It improves the model's accuracy by showing it examples where the reasoning process is explained. It has been effective in tasks like arithmetic, commonsense, and symbolic reasoning. Models of around 100 billion parameters have achieved a 57% solve rate accuracy on the GSM8K task. Smaller models do not show the same performance gains.
4
+ Information gathered from url https://www.linkedin.com/pulse/chain-of-thought-prompting-practical-guide-chatgpt-paul-zimeras: The cognitive process behind chain of thought prompting involves breaking down complex problems into manageable steps. It includes problem identification, understanding the problem, generating solutions, evaluating and selecting solutions, executing the solution, evaluating the solution, and acknowledging limitations and future directions. The process aims to address the original problem statement with a well-grounded decision. The Chain-of-Thought Prompting methodology provides a framework to guide problem-solving and encourages iterative and adaptive problem-solving.
outputs/540826e12734403b01e368d80fefc9cae0571027/research_report.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Chain of Thought (CoT) Prompting: A Cognitive Process Unveiling Advanced Reasoning Capabilities in Large Language Models
2
+
3
+ ## Introduction
4
+
5
+ The realm of artificial intelligence has revolutionized scientific research and discovery, with one such revolution being unveiled with the cognitive process behind chain-of-thought (CoT) prompting. This cognitive process seeks to advance reasoning capabilities in large language models (LLMs) by endorsing the practice of breaking down complex thoughts into intermediate steps, hence aiding in problem-solving. CoT prompting has been able to significantly improve the results of complex tasks demanding reasoning prior to generating a response.
6
+
7
+ ## Cognitive Process behind CoT Prompting
8
+
9
+ CoT prompting operates by stimulating LLMs to dissect complex thoughts into smaller, manageable steps, thus fostering a more tailored approach to problem-solving (Promptingguide.ai, n.d.). It is implied that LLMs employ a strategy paralleling how humans resolve convoluted mathematical or logical inquiries by splitting them into lesser operations (Deepgram, n.d.). Simply put, CoT prompting is reminiscent of the way humans process information and reason.
10
+
11
+ ## Impacts of CoT Prompting on LLMs' Performance
12
+
13
+ Testing of CoT prompting on various mathematical word problem benchmarks has revealed the tangible benefits of this prompting methodology, especially when it comes to LLMs with around 100 billion parameters (Deepgram, n.d.). These benefits, however, were found to be insignificant for smaller models, underlining the crucial role that model size plays with respect to the benefits accrued from CoT prompting (Learnprompting.org, n.d.).
14
+
15
+ Promisingly, CoT prompting managed to boost language models' performance in acute cognitive tasks such as arithmetic, commonsense, and symbolic reasoning tasks. Fostering advanced reasoning capabilities, one research study revealed that models encompassing around 100 billion parameters achieved a commendable solve rate accuracy of 57% on the GSM8K task (Learnprompting.org, n.d.). The leaps in performance attributed to CoT prompting are substantially supporting the proposition that LLMs with CoT prompting are becoming increasingly sophisticated in their problem-solving abilities and cognitive profoundness.
16
+
17
+ ## Advanced Characteristics of CoT Prompting
18
+
19
+ CoT prompting is heralded for its unique methodology, which consists of several stages similar to a human problem-solving procedure. This includes the identification and understanding of the problem, the generation of solutions, evaluation and selection of the optimal response, execution and assessment of the solution, and finally acknowledging limitations and outlining future directions (Zimeras, n.d.).
20
+
21
+ Evincing humanlike problem-solving tactics, the systematic and iterative approach employed by the Chain-of-Thought Prompting methodology significantly augments the potential for LLMs to address complex problems with well-grounded decisions, therefore enhancing LLMs' ability to dissect intricate problems efficiently and systematically.
22
+
23
+ ## Conclusion
24
+
25
+ Through examining and understanding the cognitive process behind CoT prompting, it is evident that it fosters LLMs' capacities to solve exceptionally complex problems that require a more sophisticated level of reasoning. Leveraging a quasi-human approach to problem-solving, CoT prompting exposes LLMs to a more systematic format of understanding and dissecting issues, thereby significantly boosting their potential for improved model performance.
26
+
27
+ Then again, one cannot dismiss the primary limitation tied to CoT prompting application, being that their advantages significantly rely on the language model size. While the research surrounding CoT prompting remains relatively in its nascent phases, existing outcomes substantiate the tangible benefits that such a cognitive process delivers in unfolding higher cognitive abilities in large language models.
28
+
29
+ ## References
30
+
31
+ Deepgram. (n.d.). Chain of thought prompting guide. https://deepgram.com/learn/chain-of-thought-prompting-guide.
32
+
33
+ Learnprompting.org. (n.d.). Intermediate/chain of thought. https://learnprompting.org/docs/intermediate/chain_of_thought.
34
+
35
+ Promptingguide.ai. (n.d.). Techniques/COT. https://www.promptingguide.ai/techniques/cot.
36
+
37
+ Zimeras, P. (n.d.). Chain of thought prompting: A practical guide [LinkedIn].
38
+ https://www.linkedin.com/pulse/chain-of-thought-prompting-practical-guide-chatgpt paul-zimeras.
outputs/540826e12734403b01e368d80fefc9cae0571027/research_report.pdf ADDED
Binary file (16.5 kB). View file
 
permchain_example/README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Permchain x Researcher
2
+ Sample use of Langchain's Autonomous agent framework Permchain with GPT Researcher.
3
+
4
+ ## Use case
5
+ Permchain is a framework for building autonomous agents that can be used to automate tasks and communication between agents to complete complex tasks. This example uses Permchain to automate the process of finding and summarizing research reports on any given topic.
6
+
7
+ ## The Agent Team
8
+ The research team is made up of 3 agents:
9
+ - Researcher agent (gpt-researcher) - This agent is in charge of finding and summarizing relevant research papers.
10
+ - Editor agent - This agent is in charge of validating the correctness of the report given a set of criteria.
11
+ - Reviser agent - This agent is in charge of revising the report until it is satisfactory.
12
+
13
+ ## How it works
14
+ The research agent (gpt-researcher) is in charge of finding and summarizing relevant research papers. It does this by using the following process:
15
+ - Search for relevant research papers using a search engine
16
+ - Extract the relevant information from the research papers
17
+ - Summarize the information into a report
18
+ - Send the report to the editor agent for validation
19
+ - Send the report to the reviser agent for revision
20
+ - Repeat until the report is satisfactory
21
+
22
+ ## How to run
23
+ 1. Install required packages:
24
+ ```bash
25
+ pip install -r requirements.txt
26
+ ```
27
+ 2. Run the application:
28
+ ```bash
29
+ python test.py
30
+ ```
31
+
32
+ ## Usage
33
+ To change the research topic, edit the `query` variable in `test.py` to the desired topic.
permchain_example/editor_actors/editor.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chat_models import ChatOpenAI
2
+ from langchain.prompts import SystemMessagePromptTemplate
3
+ from config import Config
4
+
5
+ CFG = Config()
6
+
7
+ EDIT_TEMPLATE = """You are an editor. \
8
+ You have been tasked with editing the following draft, which was written by a non-expert. \
9
+ Please accept the draft if it is good enough to publish, or send it for revision, along with your notes to guide the revision. \
10
+ Things you should be checking for:
11
+
12
+ - This draft MUST fully answer the original question
13
+ - This draft MUST be written in apa format
14
+
15
+ If not all of the above criteria are met, you should send appropriate revision notes.
16
+ """
17
+
18
+
19
+ class EditorActor:
20
+ def __init__(self):
21
+ self.model = ChatOpenAI(model=CFG.smart_llm_model)
22
+ self.prompt = SystemMessagePromptTemplate.from_template(EDIT_TEMPLATE) + "Draft:\n\n{draft}"
23
+ self.functions = [
24
+ {
25
+ "name": "revise",
26
+ "description": "Sends the draft for revision",
27
+ "parameters": {
28
+ "type": "object",
29
+ "properties": {
30
+ "notes": {
31
+ "type": "string",
32
+ "description": "The editor's notes to guide the revision.",
33
+ },
34
+ },
35
+ },
36
+ },
37
+ {
38
+ "name": "accept",
39
+ "description": "Accepts the draft",
40
+ "parameters": {
41
+ "type": "object",
42
+ "properties": {"ready": {"const": True}},
43
+ },
44
+ },
45
+ ]
46
+
47
+ @property
48
+ def runnable(self):
49
+ return (
50
+ self.prompt | self.model.bind(functions=self.functions)
51
+ )
permchain_example/research_team.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from operator import itemgetter
2
+ from langchain.runnables.openai_functions import OpenAIFunctionsRouter
3
+
4
+ from permchain.connection_inmemory import InMemoryPubSubConnection
5
+ from permchain.pubsub import PubSub
6
+ from permchain.topic import Topic
7
+
8
+ '''
9
+ This is the research team.
10
+ It is a group of autonomous agents that work together to answer a given question
11
+ using a comprehensive research process that includes:
12
+ - Searching for relevant information across multiple sources
13
+ - Extracting relevant information
14
+ - Writing a well structured report
15
+ - Validating the report
16
+ - Revising the report
17
+ - Repeat until the report is satisfactory
18
+ '''
19
+ class ResearchTeam:
20
+ def __init__(self, research_actor, editor_actor, reviser_actor):
21
+ self.research_actor_instance = research_actor
22
+ self.editor_actor_instance = editor_actor
23
+ self.revise_actor_instance = reviser_actor
24
+
25
+ def run(self, query):
26
+ # create topics
27
+ editor_inbox = Topic("editor_inbox")
28
+ reviser_inbox = Topic("reviser_inbox")
29
+
30
+ research_chain = (
31
+ # Listed in inputs
32
+ Topic.IN.subscribe()
33
+ | {"draft": lambda x: self.research_actor_instance.run(x["question"])}
34
+ # The draft always goes to the editor inbox
35
+ | editor_inbox.publish()
36
+ )
37
+
38
+ editor_chain = (
39
+ # Listen for events in the editor_inbox
40
+ editor_inbox.subscribe()
41
+ | self.editor_actor_instance.runnable
42
+ # Depending on the output, different things should happen
43
+ | OpenAIFunctionsRouter({
44
+ # If revise is chosen, we send a push to the critique_inbox
45
+ "revise": (
46
+ {
47
+ "notes": itemgetter("notes"),
48
+ "draft": editor_inbox.current() | itemgetter("draft"),
49
+ "question": Topic.IN.current() | itemgetter("question"),
50
+ }
51
+ | reviser_inbox.publish()
52
+ ),
53
+ # If accepted, then we return
54
+ "accept": editor_inbox.current() | Topic.OUT.publish(),
55
+ })
56
+ )
57
+
58
+ reviser_chain = (
59
+ # Listen for events in the reviser's inbox
60
+ reviser_inbox.subscribe()
61
+ | self.revise_actor_instance.runnable
62
+ # Publish to the editor inbox
63
+ | editor_inbox.publish()
64
+ )
65
+
66
+ web_researcher = PubSub(
67
+ research_chain,
68
+ editor_chain,
69
+ reviser_chain,
70
+ connection=InMemoryPubSubConnection(),
71
+ )
72
+
73
+ res = web_researcher.invoke({"question": query})
74
+ print(res)
75
+ return res["draft"]
permchain_example/researcher.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from permchain.connection_inmemory import InMemoryPubSubConnection
2
+ from permchain.pubsub import PubSub
3
+ from permchain.topic import Topic
4
+
5
+
6
+ class Researcher:
7
+ def __init__(self, search_actor, writer_actor):
8
+ self.search_actor_instance = search_actor
9
+ self.writer_actor_instance = writer_actor
10
+
11
+ def run(self, query):
12
+ # The research inbox
13
+ research_inbox = Topic("research")
14
+ search_actor = (
15
+ Topic.IN.subscribe()
16
+ | {
17
+ "query": lambda x: x,
18
+ "results": self.search_actor_instance.runnable
19
+ }
20
+ | research_inbox.publish()
21
+ )
22
+
23
+ write_actor = (
24
+ research_inbox.subscribe()
25
+ | self.writer_actor_instance.runnable
26
+ | Topic.OUT.publish()
27
+ )
28
+
29
+ researcher = PubSub(
30
+ search_actor,
31
+ write_actor,
32
+ connection=InMemoryPubSubConnection(),
33
+ )
34
+
35
+ res = researcher.invoke(query)
36
+ return res["answer"]