Spaces:
Running
Running
File size: 5,576 Bytes
372531f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
"""
Hi! The following test cases are for the new parameter `complement_source_urls` and fix on the functional error with `source_urls` in GPTResearcher class.
The source_urls parameter was resetting each time in conduct_research function causing gptr to forget the given links. Now, that has been fixed and a new parameter is introduced.
This parameter named will `complement_source_urls` allow GPTR to research on sources other than the provided sources via source_urls if set to True.
Default is False, i.e., no additional research will be conducted on newer sources.
"""
## Notes:
## Please uncomment the test case to run and comment the rest.
## Thanks!
#### Test case 1 (original test case as control from https://docs.gptr.dev/docs/gpt-researcher/tailored-research)
from gpt_researcher.agent import GPTResearcher # Ensure this path is correct
import asyncio
import logging
from typing import List, Dict, Any
from src.logs_handler import CustomLogsHandler # Update import
async def get_report(query: str, report_type: str, sources: list) -> str:
custom_logs_handler = CustomLogsHandler(query=query) # Pass query parameter
researcher = GPTResearcher(query=query,
report_type=report_type,
complement_source_urls=False,
websocket=custom_logs_handler)
await researcher.conduct_research()
report = await researcher.write_report()
return report, researcher
if __name__ == "__main__":
query = "Write an analysis on paul graham"
report_type = "research_report"
sources = ["https://www.paulgraham.com/when.html", "https://www.paulgraham.com/noob.html"] # query is related
report, researcher = asyncio.run(get_report(query, report_type, sources))
print(report)
print(f"\nLength of the context = {len(researcher.get_research_context())}") # Must say Non-zero value because the query is related to the contents of the page, so there will be relevant context present
#### Test case 2 (Illustrating the problem, i.e., source_urls are not scoured. Hence, no relevant context)
# from gpt_researcher.agent import GPTResearcher # Ensure this path is correct
# import asyncio
# async def get_report(query: str, report_type: str, sources: list) -> str:
# researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources)
# await researcher.conduct_research()
# report = await researcher.write_report()
# return report, researcher
# if __name__ == "__main__":
# query = "What is Microsoft's business model?"
# report_type = "research_report"
# sources = ["https://www.apple.com", "https://en.wikipedia.org/wiki/Olympic_Games"] # query is UNRELATED.
# report, researcher = asyncio.run(get_report(query, report_type, sources))
# print(report)
# print(f"\nLength of the context = {len(researcher.get_research_context())}") # Must say 0 (zero) value because the query is UNRELATED to the contents of the pages, so there will be NO relevant context present
#### Test case 3 (Suggested solution - complement_source_urls parameter allows GPTR to scour more of the web and not restrict to source_urls)
# from gpt_researcher.agent import GPTResearcher # Ensure this path is correct
# import asyncio
# async def get_report(query: str, report_type: str, sources: list) -> str:
# researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources, complement_source_urls=True)
# await researcher.conduct_research()
# report = await researcher.write_report()
# return report, researcher
# if __name__ == "__main__":
# query = "What is Microsoft's business model?"
# report_type = "research_report"
# sources = ["https://www.apple.com", "https://en.wikipedia.org/wiki/Olympic_Games"] # query is UNRELATED
# report, researcher = asyncio.run(get_report(query, report_type, sources))
# print(report)
# print(f"\nLength of the context = {len(researcher.get_research_context())}") # Must say Non-zero value because the query is UNRELATED to the contents of the page, but the complement_source_urls is set which should make gptr do default web search to gather contexts
# #### Test case 4 (Furthermore, GPTR will create more context in addition to source_urls if the complement_source_urls parameter is set allowing for a larger research scope)
# from gpt_researcher.agent import GPTResearcher # Ensure this path is correct
# import asyncio
# async def get_report(query: str, report_type: str, sources: list) -> str:
# researcher = GPTResearcher(query=query, report_type=report_type, source_urls=sources, complement_source_urls=True)
# await researcher.conduct_research()
# report = await researcher.write_report()
# return report, researcher
# if __name__ == "__main__":
# query = "What are the latest advancements in AI?"
# report_type = "research_report"
# sources = ["https://en.wikipedia.org/wiki/Artificial_intelligence", "https://www.ibm.com/watson/ai"] # query is related
# report, researcher = asyncio.run(get_report(query, report_type, sources))
# print(report)
# print(f"\nLength of the context = {len(researcher.get_research_context())}") # Must say Non-zero value because the query is related to the contents of the page, and additionally the complement_source_urls is set which should make gptr do default web search to gather more contexts!
|