Spaces:
Running
Running
feat: simple scrape with css_selector
#10
by
ibombonato
- opened
- app.py +4 -4
- stealth_scrape_tool.py +2 -2
app.py
CHANGED
@@ -5,7 +5,7 @@ import base64
|
|
5 |
from io import BytesIO
|
6 |
from PIL import Image
|
7 |
from crewai import Agent, Task, Crew, Process, LLM
|
8 |
-
from crewai_tools import
|
9 |
from dotenv import load_dotenv
|
10 |
from stealth_scrape_tool import StealthScrapeTool
|
11 |
from image_generator_tool import GenerateImageTool
|
@@ -20,7 +20,7 @@ class SocialMediaCrew:
|
|
20 |
self.natura_api_token = natura_api_token
|
21 |
self.openai_base_url = openai_base_url
|
22 |
self.openai_model_name = openai_model_name
|
23 |
-
self.scrape_tool =
|
24 |
self.calculate_discounted_price_tool = CalculateDiscountedPriceTool()
|
25 |
self.calculate_discount_value_tool = CalculateDiscountValueTool()
|
26 |
self.image_generator_tool = GenerateImageTool()
|
@@ -89,7 +89,7 @@ class SocialMediaCrew:
|
|
89 |
return merchant, css_selector, short_url
|
90 |
|
91 |
def _create_analyze_product_task(self, product_url: str, css_selector: str, main_cupom_discount_percentage: float, short_url: str, original_price: float, discounted_price: float) -> Task:
|
92 |
-
task_description = (f"1. Scrape the content of the URL: {product_url} using the 'scrape_tool'
|
93 |
"2. Extract the product name, key characteristics, and any other relevant DISCOUNT available.\n")
|
94 |
|
95 |
if original_price is not None and original_price > 0 and discounted_price is not None and discounted_price > 0:
|
@@ -267,7 +267,7 @@ with gr.Blocks() as demo:
|
|
267 |
openai_key_input = gr.Textbox(label="OPENAI_API_KEY", type="password", value=os.getenv("OPENAI_API_KEY", ""))
|
268 |
natura_token_input = gr.Textbox(label="NATURA_API_TOKEN", type="password", value=os.getenv("NATURA_API_TOKEN", ""))
|
269 |
openai_base_url_input = gr.Textbox(label="OPENAI_BASE_URL", value=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"))
|
270 |
-
openai_model_name_input = gr.Textbox(label="OPENAI_MODEL_NAME", value=os.getenv("OPENAI_MODEL_NAME", "gpt-4.1"))
|
271 |
|
272 |
clean_env_vars()
|
273 |
# No save button needed as keys are passed directly
|
|
|
5 |
from io import BytesIO
|
6 |
from PIL import Image
|
7 |
from crewai import Agent, Task, Crew, Process, LLM
|
8 |
+
from crewai_tools import ScrapeElementFromWebsiteTool
|
9 |
from dotenv import load_dotenv
|
10 |
from stealth_scrape_tool import StealthScrapeTool
|
11 |
from image_generator_tool import GenerateImageTool
|
|
|
20 |
self.natura_api_token = natura_api_token
|
21 |
self.openai_base_url = openai_base_url
|
22 |
self.openai_model_name = openai_model_name
|
23 |
+
self.scrape_tool = ScrapeElementFromWebsiteTool()
|
24 |
self.calculate_discounted_price_tool = CalculateDiscountedPriceTool()
|
25 |
self.calculate_discount_value_tool = CalculateDiscountValueTool()
|
26 |
self.image_generator_tool = GenerateImageTool()
|
|
|
89 |
return merchant, css_selector, short_url
|
90 |
|
91 |
def _create_analyze_product_task(self, product_url: str, css_selector: str, main_cupom_discount_percentage: float, short_url: str, original_price: float, discounted_price: float) -> Task:
|
92 |
+
task_description = (f"1. Scrape the content of the URL: {product_url} using the 'scrape_tool' with param `css_element` as `{css_selector}`.\n"
|
93 |
"2. Extract the product name, key characteristics, and any other relevant DISCOUNT available.\n")
|
94 |
|
95 |
if original_price is not None and original_price > 0 and discounted_price is not None and discounted_price > 0:
|
|
|
267 |
openai_key_input = gr.Textbox(label="OPENAI_API_KEY", type="password", value=os.getenv("OPENAI_API_KEY", ""))
|
268 |
natura_token_input = gr.Textbox(label="NATURA_API_TOKEN", type="password", value=os.getenv("NATURA_API_TOKEN", ""))
|
269 |
openai_base_url_input = gr.Textbox(label="OPENAI_BASE_URL", value=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"))
|
270 |
+
openai_model_name_input = gr.Textbox(label="OPENAI_MODEL_NAME", value=os.getenv("OPENAI_MODEL_NAME", "gpt-4.1-mini"))
|
271 |
|
272 |
clean_env_vars()
|
273 |
# No save button needed as keys are passed directly
|
stealth_scrape_tool.py
CHANGED
@@ -17,7 +17,7 @@ class StealthScrapeTool(BaseTool):
|
|
17 |
print(f"StealthScrapeTool: Starting scraping for {website_url}...")
|
18 |
print(f"StealthScrapeTool: Navigating to {website_url}")
|
19 |
await page.goto(website_url, timeout=120000)
|
20 |
-
await asyncio.sleep(
|
21 |
|
22 |
# Scroll to the bottom of the page repeatedly to load all dynamic content
|
23 |
print("StealthScrapeTool: Scrolling through the page to load dynamic content...")
|
@@ -33,7 +33,7 @@ class StealthScrapeTool(BaseTool):
|
|
33 |
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
34 |
print("StealthScrapeTool: Scrolled. Waiting for content to load...")
|
35 |
|
36 |
-
await asyncio.sleep(
|
37 |
|
38 |
print("StealthScrapeTool: Getting new scrollHeight...")
|
39 |
new_height = await page.evaluate("document.body.scrollHeight")
|
|
|
17 |
print(f"StealthScrapeTool: Starting scraping for {website_url}...")
|
18 |
print(f"StealthScrapeTool: Navigating to {website_url}")
|
19 |
await page.goto(website_url, timeout=120000)
|
20 |
+
await asyncio.sleep(1)
|
21 |
|
22 |
# Scroll to the bottom of the page repeatedly to load all dynamic content
|
23 |
print("StealthScrapeTool: Scrolling through the page to load dynamic content...")
|
|
|
33 |
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
34 |
print("StealthScrapeTool: Scrolled. Waiting for content to load...")
|
35 |
|
36 |
+
await asyncio.sleep(1)
|
37 |
|
38 |
print("StealthScrapeTool: Getting new scrollHeight...")
|
39 |
new_height = await page.evaluate("document.body.scrollHeight")
|