Docker and Fragrantica Support

#1
Files changed (12) hide show
  1. .dockerignore +8 -0
  2. .gitignore +7 -0
  3. Dockerfile +51 -0
  4. README.md +29 -23
  5. __init__.py +0 -0
  6. app.py +30 -10
  7. fragrantica_crew.py +109 -0
  8. pyproject.toml +15 -0
  9. requirements.txt +3 -1
  10. social_media_crew.py +56 -0
  11. stealth_scrape_tool.py +37 -0
  12. uv.lock +0 -0
.dockerignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ tmp/
2
+ .ruff_cache/
3
+ __pycache__/
4
+ .venv/
5
+ .vscode/
6
+ data/
7
+ .env
8
+ local/
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ tmp/
2
+ .ruff_cache/
3
+ __pycache__/
4
+ .venv/
5
+ .vscode/
6
+ data/
7
+ local/
Dockerfile ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12-slim
2
+
3
+ # Install uv globally
4
+ RUN pip install uv
5
+
6
+ # Update system packages and install Playwright system dependencies (as root)
7
+ RUN apt-get update && apt-get install -y \
8
+ libnss3 \
9
+ libatk-bridge2.0-0 \
10
+ libdrm2 \
11
+ libxkbcommon0 \
12
+ libxcomposite1 \
13
+ libxdamage1 \
14
+ libxrandr2 \
15
+ libgbm1 \
16
+ libxss1 libasound2 libcups2 libxfixes3 libcairo2 libpango-1.0-0 --no-install-recommends && rm -rf /var/lib/apt/lists/*
17
+
18
+ # Create a non-root user
19
+ RUN adduser --disabled-password --gecos '' appuser
20
+
21
+ # Create /app directory and set ownership
22
+ RUN mkdir /app && chown appuser:appuser /app
23
+
24
+ WORKDIR /app
25
+
26
+ # Switch to the non-root user
27
+ USER appuser
28
+
29
+ # Create virtual environment as appuser
30
+ RUN uv venv .venv
31
+ # Use the virtual environment automatically
32
+ ENV VIRTUAL_ENV=/app/.venv
33
+ ENV PATH="/app/.venv/bin:$PATH"
34
+ ENV PYTHONUNBUFFERED=1
35
+
36
+ # Copy dependency files and install dependencies as appuser
37
+ COPY pyproject.toml uv.lock ./
38
+ RUN uv sync
39
+
40
+ # Install Playwright browser binaries as appuser
41
+ RUN /app/.venv/bin/playwright install chromium
42
+
43
+ # Copy the rest of your application code as appuser
44
+ COPY . .
45
+
46
+ # Expose the port Gradio runs on
47
+ EXPOSE 7860
48
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
49
+
50
+ # Command to run the Gradio application
51
+ CMD ["uv", "run", "--active", "app.py"]
README.md CHANGED
@@ -1,8 +1,7 @@
1
  ---
2
  title: nat-ad
3
- app_file: app.py
4
- sdk: gradio
5
- sdk_version: 5.38.0
6
  ---
7
  # Social Media Ads Creator
8
 
@@ -10,40 +9,47 @@ This project leverages AI agents to automatically generate social media ad copy
10
 
11
  ## How it Works
12
 
13
- The system uses a Gradio interface (`app.py`) to take product URLs and other parameters as input. Behind the scenes, a "crew" of AI agents, each with a specific role, processes this information:
14
 
15
- 1. **Product Analyst:** This agent scrapes a product URL to extract key information like the product name, features, price, and any available discounts. It also uses a tool to shorten the URL.
16
- 2. **Social Media Copywriter:** This agent takes the product information and crafts a compelling social media post in Portuguese, tailored for platforms like WhatsApp. The post includes a call to action, emojis, and the shortened URL.
 
 
 
 
 
17
 
18
  ## Setup and Usage
19
 
20
  1. **Prerequisites:**
21
- * Python 3.12 or higher
22
  * An OpenAI API key
23
  * A Natura API token (for the URL shortener)
24
 
25
- 2. **Installation:**
26
- * The dependencies are listed in the `pyproject.toml` file.
27
-
28
- 3. **Configuration:**
29
- * Create a `.env` file in the root directory.
30
- * Add your OpenAI API key and Natura API token to the `.env` file:
31
- ```
32
- OPENAI_API_KEY="your_openai_api_key"
33
- NATURA_API_TOKEN="your_natura_api_token"
34
  ```
35
-
36
- 4. **Execution:**
37
- * Run the `app.py` script to launch the Gradio application:
38
  ```bash
39
- u run app.py
40
  ```
41
- * Access the Gradio interface in your web browser at the address provided in the console (usually `http://127.0.0.1:7860`).
42
 
43
  ## Key Files
44
 
45
  * `app.py`: The Gradio application that provides the user interface.
46
- * `social_media_crew.py`: Defines the AI agents and their tasks.
 
 
47
  * `shortener_tool.py`: A custom tool for shortening URLs.
48
- * `.env`: The configuration file for API keys.
 
49
  * `pyproject.toml`: The project's metadata and dependencies.
 
 
 
 
 
 
 
1
  ---
2
  title: nat-ad
3
+ sdk: docker
4
+ app_port: 7860
 
5
  ---
6
  # Social Media Ads Creator
7
 
 
9
 
10
  ## How it Works
11
 
12
+ The system uses a Gradio interface (`app.py`) with two main tabs:
13
 
14
+ 1. **Social Media Ad Generator:** This tab takes product URLs and other parameters as input. Behind the scenes, a "crew" of AI agents, each with a specific role, processes this information:
15
+ * **Product Analyst:** This agent scrapes a product URL to extract key information like the product name, features, price, and any available discounts. It also uses a tool to shorten the URL.
16
+ * **Social Media Copywriter:** This agent takes the product information and crafts a compelling social media post in Portuguese, tailored for platforms like WhatsApp. The post includes a call to action, emojis, and the shortened URL.
17
+
18
+ 2. **Fragrantica Website Analyzer:** This new tab allows users to input a Fragrantica.com URL for a perfume. A dedicated "FragranticaCrew" analyzes the webpage using a stealthy web scraping tool (`StealthScrapeTool`) to bypass anti-bot measures. The crew then generates a comprehensive perfume analysis report.
19
+ * **Expert Perfume Analyst and Web Data Extractor:** This agent extracts detailed perfume information (notes, accords, longevity, sillage, similar fragrances, reviews) from the Fragrantica page.
20
+ * **Fragrance Expert Woman and Perfume Analysis Reporter:** This agent synthesizes the extracted data into a human-friendly report, including graded evaluations and personalized recommendations.
21
 
22
  ## Setup and Usage
23
 
24
  1. **Prerequisites:**
25
+ * Docker installed
26
  * An OpenAI API key
27
  * A Natura API token (for the URL shortener)
28
 
29
+ 2. **Installation & Execution (Docker):**
30
+ * Build the Docker image:
31
+ ```bash
32
+ docker build -t natura-ads .
 
 
 
 
 
33
  ```
34
+ * Run the Docker container, mapping port 7860 and passing API keys as environment variables:
 
 
35
  ```bash
36
+ docker run -p 7860:7860 -e OPENAI_API_KEY="your_openai_api_key" -e NATURA_API_TOKEN="your_natura_api_token" -e OPENAI_BASE_URL="your_openai_base_url" -e OPENAI_MODEL_NAME="your_openai_model_name" natura-ads
37
  ```
38
+ * Access the Gradio interface in your web browser at `http://localhost:7860`.
39
 
40
  ## Key Files
41
 
42
  * `app.py`: The Gradio application that provides the user interface.
43
+ * `social_media_crew.py`: Defines the AI agents and their tasks for social media ad generation.
44
+ * `fragrantica_crew.py`: Defines the AI agents and their tasks for Fragrantica website analysis.
45
+ * `stealth_scrape_tool.py`: A custom tool for stealthy web scraping using Playwright.
46
  * `shortener_tool.py`: A custom tool for shortening URLs.
47
+ * `Dockerfile`: Defines the Docker image for deploying the application.
48
+ * `.env`: The configuration file for API keys (used for local development, environment variables preferred for Docker).
49
  * `pyproject.toml`: The project's metadata and dependencies.
50
+
51
+
52
+ # Roadmap
53
+
54
+ - [x] Add support for any model/api key supported by LiteLLM.
55
+ - [x] Add Fragrantica support, where user will input a Fragrantica URL and the agent will extract and generate a Perfume Analysis report.
__init__.py ADDED
File without changes
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import gradio as gr
2
  import os
3
  import requests
4
- from dotenv import load_dotenv
5
-
6
- load_dotenv()
7
  from crewai import Agent, Task, Crew, Process, LLM
8
  from crewai_tools import ScrapeWebsiteTool
9
  from crewai.tools import BaseTool
 
 
 
10
 
11
  class ShortenerTool(BaseTool):
12
  name: str = "URL Shortener Tool"
@@ -27,7 +27,7 @@ class ShortenerTool(BaseTool):
27
  print(f"Warning: Error generating short URL: {e}. Returning original URL.")
28
  return original_url
29
  except ValueError:
30
- print(f"Warning: Invalid JSON response from shortener API. Returning original URL.")
31
  return original_url
32
 
33
  class CalculateDiscountedPriceTool(BaseTool):
@@ -105,9 +105,9 @@ class SocialMediaCrew:
105
  return "INVALID_URL"
106
 
107
  analyze_product_task = Task(
108
- description=(f"1. Scrape the content of the URL: {product_url} using the 'scrape_tool'.\n2. Identify and extract the original product price and the final discounted price if existing. IGNORE any price breakdowns like 'produto' or 'consultoria'.\n3. Extract the product name, key characteristics, and any other relevant DISCOUNT available.\n4. Use the 'Calculate Discounted Price Tool' with the extracted final best price and the provided discount percentage ({main_cupom_discount_percentage}) to get the CUPOM DISCOUNTED PRICE.\n5. Use the 'URL Shortener Tool' to generate a short URL for {product_url}. If the shortener tool returns an error, use the original URL.\n6. Provide all this information, including the product name, ORIGINAL PRICE (the primary price from step 2), CUPOM DISCOUNTED PRICE, and the generated short URL (or the original if the shortener failed). If any of this information cannot be extracted, you MUST return 'MISSING_PRODUCT_INFO'."),
109
  agent=self.product_analyst,
110
- expected_output="A concise summary of the product including its name, key features, unique selling points, ORIGINAL PRICE, CUPOM DISCOUNTED PRICE, and the SHORT SHAREABLE URL (or the original if the shortener failed), OR 'MISSING_PRODUCT_INFO' if essential product details are not found."
111
  )
112
 
113
  create_post_task = Task(
@@ -180,20 +180,40 @@ with gr.Blocks() as demo:
180
  cupom_1_input = gr.Textbox(label="Cupom 1 (e.g., AMIGO15)", placeholder="Enter first coupon code...")
181
  cupom_2_input = gr.Textbox(label="Cupom 2 (e.g., JULHOA)", placeholder="Enter second coupon code...")
182
  generate_button = gr.Button("Generate Ad")
183
- ad_output = gr.Markdown(label="Your Generated Ad")
184
 
 
 
 
 
 
 
185
  with gr.Tab("Settings"):
186
  gr.Markdown("### ⚙️ API Key Settings")
187
  gr.Markdown("Enter your API keys below. These will be used for the current session.")
188
  openai_key_input = gr.Textbox(label="OPENAI_API_KEY", type="password", value=os.getenv("OPENAI_API_KEY", ""))
189
  natura_token_input = gr.Textbox(label="NATURA_API_TOKEN", type="password", value=os.getenv("NATURA_API_TOKEN", ""))
190
  openai_base_url_input = gr.Textbox(label="OPENAI_BASE_URL", value=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"))
191
- openai_model_name_input = gr.Textbox(label="OPENAI_MODEL_NAME", value=os.getenv("OPENAI_MODEL_NAME", "gpt-4o-mini"))
192
 
193
  clean_env_vars()
194
  # No save button needed as keys are passed directly
195
  gr.Markdown("API keys are used directly from these fields when you click 'Generate Ad'. They are not saved persistently.")
196
 
197
  generate_button.click(generate_ad, inputs=[url_input, main_cupom_input, main_cupom_discount_percentage_input, cupom_1_input, cupom_2_input, openai_key_input, natura_token_input, openai_base_url_input, openai_model_name_input], outputs=ad_output)
198
-
199
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import os
3
  import requests
 
 
 
4
  from crewai import Agent, Task, Crew, Process, LLM
5
  from crewai_tools import ScrapeWebsiteTool
6
  from crewai.tools import BaseTool
7
+ from dotenv import load_dotenv
8
+
9
+ load_dotenv()
10
 
11
  class ShortenerTool(BaseTool):
12
  name: str = "URL Shortener Tool"
 
27
  print(f"Warning: Error generating short URL: {e}. Returning original URL.")
28
  return original_url
29
  except ValueError:
30
+ print("Warning: Invalid JSON response from shortener API. Returning original URL.")
31
  return original_url
32
 
33
  class CalculateDiscountedPriceTool(BaseTool):
 
105
  return "INVALID_URL"
106
 
107
  analyze_product_task = Task(
108
+ description=(f"1. Scrape the content of the URL: {product_url} using the 'scrape_tool'.\n2. Identify and extract the original product price and the final discounted price if existing. IGNORE any price breakdowns like 'produto' or 'consultoria'.\n3. Extract the product name, key characteristics, and any other relevant DISCOUNT available.\n4. Use the 'Calculate Discounted Price Tool' with the extracted final best price and the provided discount percentage ({main_cupom_discount_percentage}) to get the CUPOM DISCOUNTED PRICE.\n5. Use the 'URL Shortener Tool' to generate a short URL for {product_url}. If the shortener tool returns an error, use the original URL.\n6. Provide all this information, including the product name, ORIGINAL PRICE, DISCOUNTED PRICE (the one used as the input in the tool 'Calculate Discounted Price Tool'), 2) CUPOM DISCOUNTED PRICE, and the generated short URL (or the original if the shortener failed). If any of this information cannot be extracted, you MUST return 'MISSING_PRODUCT_INFO'."),
109
  agent=self.product_analyst,
110
+ expected_output="A concise summary of the product including its name, key features, unique selling points, ORIGINAL PRICE, DISCOUNTED PRICE (the one used as the input in the tool 'Calculate Discounted Price Tool'), CUPOM DISCOUNTED PRICE, and the SHORT SHAREABLE URL (or the original if the shortener failed), OR 'MISSING_PRODUCT_INFO' if essential product details are not found."
111
  )
112
 
113
  create_post_task = Task(
 
180
  cupom_1_input = gr.Textbox(label="Cupom 1 (e.g., AMIGO15)", placeholder="Enter first coupon code...")
181
  cupom_2_input = gr.Textbox(label="Cupom 2 (e.g., JULHOA)", placeholder="Enter second coupon code...")
182
  generate_button = gr.Button("Generate Ad")
183
+ ad_output = gr.Markdown(label="Your Generated Ad", show_copy_button=True)
184
 
185
+ with gr.Tab("Fragrantica"):
186
+ gr.Markdown("### 👃 Fragrantica Website Analyzer")
187
+ fragrantica_url_input = gr.Textbox(label="Fragrantica Product URL", placeholder="Enter Fragrantica product URL here...")
188
+ analyze_fragrantica_button = gr.Button("Analyze Fragrantica Product")
189
+ fragrantica_output = gr.Markdown(label="Fragrantica Analysis Report")
190
+
191
  with gr.Tab("Settings"):
192
  gr.Markdown("### ⚙️ API Key Settings")
193
  gr.Markdown("Enter your API keys below. These will be used for the current session.")
194
  openai_key_input = gr.Textbox(label="OPENAI_API_KEY", type="password", value=os.getenv("OPENAI_API_KEY", ""))
195
  natura_token_input = gr.Textbox(label="NATURA_API_TOKEN", type="password", value=os.getenv("NATURA_API_TOKEN", ""))
196
  openai_base_url_input = gr.Textbox(label="OPENAI_BASE_URL", value=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"))
197
+ openai_model_name_input = gr.Textbox(label="OPENAI_MODEL_NAME", value=os.getenv("OPENAI_MODEL_NAME", "gpt-4.1"))
198
 
199
  clean_env_vars()
200
  # No save button needed as keys are passed directly
201
  gr.Markdown("API keys are used directly from these fields when you click 'Generate Ad'. They are not saved persistently.")
202
 
203
  generate_button.click(generate_ad, inputs=[url_input, main_cupom_input, main_cupom_discount_percentage_input, cupom_1_input, cupom_2_input, openai_key_input, natura_token_input, openai_base_url_input, openai_model_name_input], outputs=ad_output)
204
+
205
+ # Placeholder for Fragrantica analysis function
206
+ def analyze_fragrantica_url(url, openai_api_key, natura_api_token, openai_base_url, openai_model_name):
207
+ if not openai_api_key or not openai_model_name or not openai_base_url:
208
+ return "Please configure your API keys in the settings section below."
209
+ from fragrantica_crew import FragranticaCrew
210
+ fragrantica_crew = FragranticaCrew(openai_api_key, openai_base_url, openai_model_name)
211
+ report = fragrantica_crew.kickoff(url=url)
212
+ if report == "SCRAPING_FAILED":
213
+ return "❌ Scraping failed. The website could not be accessed or parsed. Please check the URL or try again later."
214
+ return report.raw
215
+
216
+ analyze_fragrantica_button.click(analyze_fragrantica_url, inputs=[fragrantica_url_input, openai_key_input, natura_token_input, openai_base_url_input, openai_model_name_input], outputs=fragrantica_output)
217
+
218
+ if __name__ == "__main__":
219
+ demo.launch(server_name="0.0.0.0", server_port=7860)
fragrantica_crew.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Agent, Task, Crew, Process, LLM
2
+
3
+ from stealth_scrape_tool import StealthScrapeTool
4
+
5
+ class FragranticaCrew:
6
+ def __init__(self, openai_api_key: str, openai_base_url: str, openai_model_name: str):
7
+ self.openai_api_key = openai_api_key
8
+ self.openai_base_url = openai_base_url
9
+ self.openai_model_name = openai_model_name
10
+ self.scrape_tool = StealthScrapeTool()
11
+
12
+ llm = LLM(
13
+ api_key=self.openai_api_key,
14
+ model=self.openai_model_name,
15
+ base_url=self.openai_base_url
16
+ )
17
+
18
+ self.research_agent = Agent(
19
+ role='Expert Perfume Analyst and Web Data Extractor',
20
+ goal="Analyze the content of the provided URL, which leads to a perfume review page. Based on the page's content, including official descriptions and user reviews, you must extract the specified information and format it as a user friendly text.",
21
+ backstory=("As an expert in the world of fragrances and olfactory evaluator, you have a gift for dissecting complex perfume pages. You can read through hundreds of user reviews and technical details on a webpage, synthesizing them into a clear, structured summary. Your expertise allows you to adeptly identify olfactory notes, longevity, sillage and similar fragrances, providing a comprehensive analysis for any fragrance enthusiast."),
22
+ verbose=True,
23
+ tools=[self.scrape_tool],
24
+ allow_delegation=False,
25
+ llm=llm,
26
+ max_retries=3
27
+ )
28
+
29
+ self.reporter_agent = Agent(
30
+ role='Fragrance Expert Woman and Perfume Analysis Reporter',
31
+ goal='Produce a "Human Friendly" analysis containing specific graded evaluations and personalized recommendations based on the extracted perfume information.',
32
+ backstory=("You are a seasoned reporter with a passion for fragrances. You excel at transforming raw data about perfumes into engaging, well-structured, and informative reports. Your reports highlight key characteristics, unique selling points, and provide a holistic view of the fragrance, making it easy for enthusiasts to understand and appreciate. You are also an extraordinary woman, capable of providing insightful and personalized recommendations."),
33
+ verbose=True,
34
+ allow_delegation=False,
35
+ llm=llm,
36
+ max_retries=3
37
+ )
38
+
39
+ def kickoff(self, url: str) -> str:
40
+ research_task = Task(
41
+ description=(
42
+ f"""1. Scrape the content of the URL: {url} using the 'Stealth Web Scraper' tool with `website_url` as {url} and `css_element` as "#main-content". If the scraping tool fails or returns empty content ONCE, try the `css_element` as "body", If they also fail when you pass `css_element` as "body", then you MUST return the exact string "SCRAPING_FAILED".
43
+
44
+ 2. If scraping is successful, carefully analyze the entire page content to extract the following information:
45
+
46
+ - Resumo: Look for a general summary of the perfume, often found near the top or in introductory paragraphs, synthesizing user opinions if available.
47
+
48
+ - Acordes principais: Find the section listing 'Main Accords' or similar, and extract the list of accords (e.g., 'amadeirado', 'cítrico', 'floral').
49
+
50
+ - Pirâmide Olfativa: Identify sections for 'Top Notes', 'Middle Notes', and 'Base Notes'. Extract the notes for 'topo' (top), 'coracao' (heart), and 'fundo' (base) into a dictionary format.
51
+
52
+ - Longevidade: Locate user polls or reviews discussing longevity. Choose one of the following exact string values based on the overall sentiment: 'Fraca', 'Moderada', 'Longa', 'Eterna'.
53
+ - Projeção: Locate user polls or reviews discussing sillage/projection. Choose one of the following exact string values based on the overall sentiment: 'Íntima', 'Moderada', 'Forte', 'Enorme'.
54
+
55
+ - Este Perfume me Lembra do: Find the section titled "Este perfume me lembra do", and list the perfume names mentioned there.
56
+
57
+ - Resumo detalhado: Look for a section containing detailed user reviews, such as "Todas as Resenhas por Data" or similar, and synthesize a detailed summary from these reviews.
58
+
59
+ 3. Present the extracted information in a clear, structured format, ready for reporting. If any specific piece of information cannot be found, check again to make sure they are not found, after check again, if you truly do not find the info, state 'N/A' for that field. If the entire scraping process fails, return "SCRAPING_FAILED".
60
+ """
61
+ ),
62
+ agent=self.research_agent,
63
+ expected_output=(
64
+ """A structured text containing all the extracted information:
65
+ Resumo,
66
+ Acordes principais,
67
+ Pirâmide Olfativa,
68
+ Longevidade,
69
+ Projeção,
70
+ Este Perfume me Lembra do,
71
+ and Resumo detalhado.
72
+ Ensure Longevidade and Projeção use the exact specified string values.
73
+ If any information is not found, state 'N/A' for that specific field. If the scraping process fails entirely, return the exact string "SCRAPING_FAILED"."""
74
+ )
75
+ )
76
+
77
+ report_task = Task(
78
+ description=(
79
+ """With the extracted information, as an Fragrance Expert woman, your next step is to produce a "Human Friendly" analysis containing:\n"
80
+ "If the input you receive from the research agent is "SCRAPING_FAILED", you MUST stop and output only that same message.\n"
81
+ - Nível de "doçura": Ranging from 1 to 5\n
82
+ - Intensidade: Ranging from 1 to 5\n
83
+ - Fixação na minha pele: Ranging from 1 to 5\n
84
+ - Projeção: Ranging from 1 to 5\n
85
+ - Segue o estilo do perfurme: Select the perfume that most match this one, based on "Este Perfume me Lembra do" and "Resumo detalhado" extracted earlier\n
86
+ - Como ele é, na minha percepção: Based on your analyses, write a concise summary about "How do I see it". Where you give your opinion using info about the perfume grades, and etc.\n
87
+ - Eu indico para quem: Give your opinion two opinions about who would like it. Something like "gostam de frangâncias cítricas e amedeirado", "Querem um perfurme forte para usar no inverno"\n
88
+ Your output must be a text containing the "Extraction" values and the "Process" values, in user friaendly format."""
89
+ ),
90
+ agent=self.reporter_agent,
91
+ expected_output=(
92
+ """A comprehensive perfume analysis report in markdown format.
93
+ The report must include all extracted information (Resumo, Acordes principais, Pirâmide Olfativa, Longevidade, Projeção, Este Perfume me Lembra, Resumo detalhado)
94
+ and the "Human Friendly" analysis (Nível de "doçura", Intensidade, Fixação na minha pele, Projeção, Segue o estilo do perfurme, Como ele é, na minha percepção, Eu indico para quem)."""
95
+ ),
96
+ context=[research_task]
97
+ )
98
+
99
+ crew = Crew(
100
+ agents=[self.research_agent, self.reporter_agent],
101
+ tasks=[research_task, report_task],
102
+ process=Process.sequential
103
+ )
104
+
105
+ print(f"Fragrantica Crew is kicking off for URL: {url}")
106
+ result = crew.kickoff()
107
+ if result == "SCRAPING_FAILED":
108
+ return result
109
+ return result
pyproject.toml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "crewai-agent"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "beautifulsoup4>=4.13.4",
9
+ "crewai>=0.148.0",
10
+ "crewai-tools>=0.55.0",
11
+ "gradio>=5.38.0",
12
+ "litellm>=1.72.6",
13
+ "playwright>=1.53.0",
14
+ "playwright-stealth>=2.0.0",
15
+ ]
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  crewai>=0.148.0
2
  crewai-tools>=0.55.0
3
  gradio>=5.38.0
4
- litellm
 
 
 
1
  crewai>=0.148.0
2
  crewai-tools>=0.55.0
3
  gradio>=5.38.0
4
+ litellm>=1.72.6
5
+ playwright>=1.53.0
6
+ playwright-stealth>=2.0.0
social_media_crew.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from crewai import Agent, Task, Crew, Process
3
+ from crewai_tools import ScrapeWebsiteTool
4
+ from shortener_tool import ShortenerTool
5
+
6
+ class SocialMediaCrew:
7
+ def __init__(self):
8
+ self.scrape_tool = ScrapeWebsiteTool()
9
+ self.shortener_tool = ShortenerTool()
10
+
11
+ self.product_analyst = Agent(
12
+ role='Product Analyst',
13
+ goal='Analyze the provided URL and extract key product information',
14
+ backstory=("""You are an expert in analyzing product pages and extracting the most important information.
15
+ You can identify the product name, the price, discount if any, its main features, and the target audience."""),
16
+ verbose=True,
17
+ tools=[self.scrape_tool, self.shortener_tool],
18
+ )
19
+
20
+ self.social_media_copywriter = Agent(
21
+ role='Social Media Copywriter',
22
+ goal='Create a compelling social media post in Portuguese to sell the product',
23
+ backstory=("""You are a creative copywriter specialized in the beauty and fragrance market.
24
+ You know how to craft posts that are engaging, persuasive, and tailored for a Portuguese-speaking audience.
25
+ You are an expert in using emojis and hashtags to increase engagement."""),
26
+ verbose=True,
27
+ )
28
+
29
+ def run_crew(self, product_url: str) -> str:
30
+ analyze_product_task = Task(
31
+ description=(f"""Using the 'scrape_tool', scrape the content of the URL: {product_url} and provide a summary of the product.
32
+ Focus on the product name, its key characteristics, the FINAL PRICE, any DISCOUNT available.
33
+ Then, use the 'URL Shortener Tool' to generate a short URL for {product_url}. If the shortener tool returns an error, use the original URL.
34
+ Finally, provide all this information, including the generated short URL (or the original if shortener failed)."""),
35
+ agent=self.product_analyst,
36
+ expected_output="A concise summary of the product including its name, key features, unique selling points, FINAL PRICE, any DISCOUNT available, and the SHORT SHAREABLE URL (or the original URL if shortener failed)."
37
+ )
38
+
39
+ create_post_task = Task(
40
+ description=("""Based on the product analysis, create a CONCISE and DIRECT social media post in Portuguese, suitable for a WhatsApp group.
41
+ The post should be exciting and highlight the main benefits of the perfume, including the FINAL PRICE, any DISCOUNT, and the SHORT SHAREABLE URL.
42
+ Ensure a URL is always present in the output. Include a clear call to action and a MAXIMUM of 2 relevant emojis. DO NOT include hashtags. Keep it short and impactful."""),
43
+ agent=self.social_media_copywriter,
44
+ expected_output="A short, direct, and impactful social media post in Portuguese for WhatsApp, including the FINAL PRICE, any DISCOUNT, the SHORT SHAREABLE URL, a call to action, and up to 2 emojis. No hashtags should be present. A URL must always be present in the final output.",
45
+ context=[analyze_product_task]
46
+ )
47
+
48
+ crew = Crew(
49
+ agents=[self.product_analyst, self.social_media_copywriter],
50
+ tasks=[analyze_product_task, create_post_task],
51
+ process=Process.sequential
52
+ )
53
+
54
+ print(f"Crew is kicking off for URL: {product_url}")
55
+ result = crew.kickoff()
56
+ return result
stealth_scrape_tool.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from playwright.async_api import async_playwright
3
+ from playwright_stealth import Stealth
4
+ from bs4 import BeautifulSoup
5
+ from crewai.tools import BaseTool
6
+
7
+ class StealthScrapeTool(BaseTool):
8
+ name: str = "Stealth Web Scraper"
9
+ description: str = "A tool for stealthily scraping content from a given URL using Playwright and a CSS selector."
10
+
11
+ async def _arun(self, website_url: str, css_element: str) -> str:
12
+ try:
13
+ async with Stealth().use_async(async_playwright()) as p:
14
+ browser = await p.chromium.launch(headless=True)
15
+ page = await browser.new_page()
16
+
17
+ await page.goto(website_url, timeout=120000)
18
+
19
+ # Wait for the specific element to be present
20
+ await page.wait_for_selector(css_element, timeout=60000)
21
+
22
+ html_content = await page.content()
23
+ soup = BeautifulSoup(html_content, 'html.parser')
24
+
25
+ target_element = soup.select_one(css_element)
26
+ if target_element:
27
+ return target_element.prettify()
28
+ else:
29
+ return f"Error: Could not find element with selector '{css_element}' on the page."
30
+ except Exception as e:
31
+ return f"Error during stealth web scraping: {e}"
32
+
33
+ def _run(self, website_url: str, css_element: str) -> str:
34
+ # This method is for synchronous execution, which is not ideal for Playwright.
35
+ # CrewAI typically calls _arun for async tools.
36
+ # For simplicity, we'll just call the async version here.
37
+ return asyncio.run(self._arun(website_url, css_element))
uv.lock ADDED
The diff for this file is too large to render. See raw diff