feat: simple scrape with css_selector

#10
Files changed (2) hide show
  1. app.py +4 -4
  2. stealth_scrape_tool.py +2 -2
app.py CHANGED
@@ -5,7 +5,7 @@ import base64
5
  from io import BytesIO
6
  from PIL import Image
7
  from crewai import Agent, Task, Crew, Process, LLM
8
- from crewai_tools import ScrapeWebsiteTool
9
  from dotenv import load_dotenv
10
  from stealth_scrape_tool import StealthScrapeTool
11
  from image_generator_tool import GenerateImageTool
@@ -20,7 +20,7 @@ class SocialMediaCrew:
20
  self.natura_api_token = natura_api_token
21
  self.openai_base_url = openai_base_url
22
  self.openai_model_name = openai_model_name
23
- self.scrape_tool = ScrapeWebsiteTool()
24
  self.calculate_discounted_price_tool = CalculateDiscountedPriceTool()
25
  self.calculate_discount_value_tool = CalculateDiscountValueTool()
26
  self.image_generator_tool = GenerateImageTool()
@@ -89,7 +89,7 @@ class SocialMediaCrew:
89
  return merchant, css_selector, short_url
90
 
91
  def _create_analyze_product_task(self, product_url: str, css_selector: str, main_cupom_discount_percentage: float, short_url: str, original_price: float, discounted_price: float) -> Task:
92
- task_description = (f"1. Scrape the content of the URL: {product_url} using the 'scrape_tool'.\n"
93
  "2. Extract the product name, key characteristics, and any other relevant DISCOUNT available.\n")
94
 
95
  if original_price is not None and original_price > 0 and discounted_price is not None and discounted_price > 0:
@@ -267,7 +267,7 @@ with gr.Blocks() as demo:
267
  openai_key_input = gr.Textbox(label="OPENAI_API_KEY", type="password", value=os.getenv("OPENAI_API_KEY", ""))
268
  natura_token_input = gr.Textbox(label="NATURA_API_TOKEN", type="password", value=os.getenv("NATURA_API_TOKEN", ""))
269
  openai_base_url_input = gr.Textbox(label="OPENAI_BASE_URL", value=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"))
270
- openai_model_name_input = gr.Textbox(label="OPENAI_MODEL_NAME", value=os.getenv("OPENAI_MODEL_NAME", "gpt-4.1"))
271
 
272
  clean_env_vars()
273
  # No save button needed as keys are passed directly
 
5
  from io import BytesIO
6
  from PIL import Image
7
  from crewai import Agent, Task, Crew, Process, LLM
8
+ from crewai_tools import ScrapeElementFromWebsiteTool
9
  from dotenv import load_dotenv
10
  from stealth_scrape_tool import StealthScrapeTool
11
  from image_generator_tool import GenerateImageTool
 
20
  self.natura_api_token = natura_api_token
21
  self.openai_base_url = openai_base_url
22
  self.openai_model_name = openai_model_name
23
+ self.scrape_tool = ScrapeElementFromWebsiteTool()
24
  self.calculate_discounted_price_tool = CalculateDiscountedPriceTool()
25
  self.calculate_discount_value_tool = CalculateDiscountValueTool()
26
  self.image_generator_tool = GenerateImageTool()
 
89
  return merchant, css_selector, short_url
90
 
91
  def _create_analyze_product_task(self, product_url: str, css_selector: str, main_cupom_discount_percentage: float, short_url: str, original_price: float, discounted_price: float) -> Task:
92
+ task_description = (f"1. Scrape the content of the URL: {product_url} using the 'scrape_tool' with param `css_element` as `{css_selector}`.\n"
93
  "2. Extract the product name, key characteristics, and any other relevant DISCOUNT available.\n")
94
 
95
  if original_price is not None and original_price > 0 and discounted_price is not None and discounted_price > 0:
 
267
  openai_key_input = gr.Textbox(label="OPENAI_API_KEY", type="password", value=os.getenv("OPENAI_API_KEY", ""))
268
  natura_token_input = gr.Textbox(label="NATURA_API_TOKEN", type="password", value=os.getenv("NATURA_API_TOKEN", ""))
269
  openai_base_url_input = gr.Textbox(label="OPENAI_BASE_URL", value=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"))
270
+ openai_model_name_input = gr.Textbox(label="OPENAI_MODEL_NAME", value=os.getenv("OPENAI_MODEL_NAME", "gpt-4.1-mini"))
271
 
272
  clean_env_vars()
273
  # No save button needed as keys are passed directly
stealth_scrape_tool.py CHANGED
@@ -17,7 +17,7 @@ class StealthScrapeTool(BaseTool):
17
  print(f"StealthScrapeTool: Starting scraping for {website_url}...")
18
  print(f"StealthScrapeTool: Navigating to {website_url}")
19
  await page.goto(website_url, timeout=120000)
20
- await asyncio.sleep(5)
21
 
22
  # Scroll to the bottom of the page repeatedly to load all dynamic content
23
  print("StealthScrapeTool: Scrolling through the page to load dynamic content...")
@@ -33,7 +33,7 @@ class StealthScrapeTool(BaseTool):
33
  await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
34
  print("StealthScrapeTool: Scrolled. Waiting for content to load...")
35
 
36
- await asyncio.sleep(5)
37
 
38
  print("StealthScrapeTool: Getting new scrollHeight...")
39
  new_height = await page.evaluate("document.body.scrollHeight")
 
17
  print(f"StealthScrapeTool: Starting scraping for {website_url}...")
18
  print(f"StealthScrapeTool: Navigating to {website_url}")
19
  await page.goto(website_url, timeout=120000)
20
+ await asyncio.sleep(1)
21
 
22
  # Scroll to the bottom of the page repeatedly to load all dynamic content
23
  print("StealthScrapeTool: Scrolling through the page to load dynamic content...")
 
33
  await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
34
  print("StealthScrapeTool: Scrolled. Waiting for content to load...")
35
 
36
+ await asyncio.sleep(1)
37
 
38
  print("StealthScrapeTool: Getting new scrollHeight...")
39
  new_height = await page.evaluate("document.body.scrollHeight")