Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- README.md +5 -5
- app.py +59 -0
- requirements.txt +3 -0
- scrape_fake_app.py +85 -0
README.md
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
---
|
| 2 |
title: Fake App Scraper
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.33.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
short_description:
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
title: Fake App Scraper
|
| 3 |
+
emoji: 📈
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 5.33.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
+
short_description: uses playwright to scrape a fake app hosted on vercel
|
| 12 |
+
tag: "mcp-server-track"
|
| 13 |
---
|
| 14 |
|
|
|
app.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from scrape_fake_app import get_homework,get_timetable
|
| 3 |
+
|
| 4 |
+
def fetch_homework(date:str='today') -> str:
|
| 5 |
+
"""
|
| 6 |
+
description:
|
| 7 |
+
fetch the homeworks.
|
| 8 |
+
Args:
|
| 9 |
+
date: any string, default "today"
|
| 10 |
+
Returns:
|
| 11 |
+
The string describing the homeworks
|
| 12 |
+
"""
|
| 13 |
+
return get_homework()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def fetch_timetable(date:str='today') -> str:
|
| 17 |
+
"""
|
| 18 |
+
description:
|
| 19 |
+
fetch the timetable
|
| 20 |
+
Args:
|
| 21 |
+
date: any string, default "today"
|
| 22 |
+
Returns:
|
| 23 |
+
The string describing the timetable
|
| 24 |
+
"""
|
| 25 |
+
return get_timetable()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
with gr.Blocks() as demo:
|
| 30 |
+
|
| 31 |
+
# Add title and markdown
|
| 32 |
+
with gr.Row():
|
| 33 |
+
gr.Markdown("""## Fake App Dashboard
|
| 34 |
+
Use playwright to scrape homework and timetable from [fake-app-omega.vercel.app](https://fake-app-omega.vercel.app)
|
| 35 |
+
""")
|
| 36 |
+
|
| 37 |
+
with gr.Row():
|
| 38 |
+
date = gr.Textbox(label="date",visible=False)
|
| 39 |
+
|
| 40 |
+
with gr.Row():
|
| 41 |
+
with gr.Column():
|
| 42 |
+
homeworks_btn = gr.Button("Homeworks")
|
| 43 |
+
homeworks_output = gr.Textbox(label="Homeworks Result", lines=10)
|
| 44 |
+
|
| 45 |
+
with gr.Column():
|
| 46 |
+
timetable_btn = gr.Button("Timetable")
|
| 47 |
+
timetable_output = gr.Textbox(label="Timetable Result", lines=10)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
homeworks_btn.click(fn=fetch_homework,
|
| 51 |
+
inputs=[date],
|
| 52 |
+
outputs=homeworks_output)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
timetable_btn.click(fn=fetch_timetable,
|
| 56 |
+
inputs=[date],
|
| 57 |
+
outputs=timetable_output)
|
| 58 |
+
|
| 59 |
+
demo.launch(mcp_server=True)
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio[mcp]
|
| 2 |
+
python-dotenv
|
| 3 |
+
playwright
|
scrape_fake_app.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from playwright.sync_api import sync_playwright
|
| 2 |
+
import os
|
| 3 |
+
from dotenv import load_dotenv
|
| 4 |
+
|
| 5 |
+
def load_credentials()-> dict:
|
| 6 |
+
load_dotenv()
|
| 7 |
+
URL = os.getenv('FAKE_APP_URL')
|
| 8 |
+
USERNAME = os.getenv('FAKE_APP_USERNAME')
|
| 9 |
+
PASSWORD = os.getenv('FAKE_APP_PASSWORD')
|
| 10 |
+
return (URL,USERNAME,PASSWORD)
|
| 11 |
+
|
| 12 |
+
def extract_homework_text(page) -> str:
|
| 13 |
+
card = page.get_by_title("homework")
|
| 14 |
+
content = card.locator("[data-slot='card-content'] section > div").all()
|
| 15 |
+
|
| 16 |
+
output = ["Homework:\n"]
|
| 17 |
+
for section in content:
|
| 18 |
+
heading = section.locator("h3").inner_text()
|
| 19 |
+
output.append(heading)
|
| 20 |
+
items = section.locator("ul > li").all()
|
| 21 |
+
for item in items:
|
| 22 |
+
# Extract full inner text including formatting
|
| 23 |
+
inner = item.inner_text().strip()
|
| 24 |
+
output.append(f" {inner}")
|
| 25 |
+
output.append("") # Add a blank line between sections
|
| 26 |
+
|
| 27 |
+
return "\n".join(output).strip()
|
| 28 |
+
|
| 29 |
+
def extract_timetable_text(page):
|
| 30 |
+
card = page.get_by_title("timetable")
|
| 31 |
+
items = card.locator("[data-slot='card-content'] ul > li").all()
|
| 32 |
+
|
| 33 |
+
output = ["Timetable:\n"]
|
| 34 |
+
for item in items:
|
| 35 |
+
# Check if it's a plain text item like "Lunch break"
|
| 36 |
+
if item.locator("span").count() == 0:
|
| 37 |
+
output.append(item.inner_text().strip())
|
| 38 |
+
else:
|
| 39 |
+
parts = item.locator("span").all()
|
| 40 |
+
line = " ".join([part.inner_text().strip() for part in parts])
|
| 41 |
+
output.append(line)
|
| 42 |
+
|
| 43 |
+
return "\n".join(output).strip()
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# print(URL,USERNAME,PASSWORD)
|
| 47 |
+
def get_homework() -> str:
|
| 48 |
+
URL,USERNAME,PASSWORD=load_credentials()
|
| 49 |
+
|
| 50 |
+
with sync_playwright() as playwright:
|
| 51 |
+
browser = playwright.firefox.launch(headless=True)
|
| 52 |
+
page = browser.new_page()
|
| 53 |
+
page.goto(URL,wait_until="domcontentloaded")
|
| 54 |
+
|
| 55 |
+
page.get_by_role('textbox',name='username').fill(USERNAME)
|
| 56 |
+
page.get_by_role('textbox',name='password').fill(PASSWORD)
|
| 57 |
+
page.get_by_role('button',name='login').click()
|
| 58 |
+
page.wait_for_url("**/dashboard")
|
| 59 |
+
# page.wait_for_timeout(1000)
|
| 60 |
+
homework = extract_homework_text(page)
|
| 61 |
+
browser.close()
|
| 62 |
+
return homework
|
| 63 |
+
|
| 64 |
+
def get_timetable() -> str:
|
| 65 |
+
URL,USERNAME,PASSWORD=load_credentials()
|
| 66 |
+
|
| 67 |
+
with sync_playwright() as playwright:
|
| 68 |
+
browser = playwright.firefox.launch(headless=True)
|
| 69 |
+
page = browser.new_page()
|
| 70 |
+
page.goto(URL,wait_until="domcontentloaded")
|
| 71 |
+
|
| 72 |
+
page.get_by_role('textbox',name='username').fill(USERNAME)
|
| 73 |
+
page.get_by_role('textbox',name='password').fill(PASSWORD)
|
| 74 |
+
page.get_by_role('button',name='login').click()
|
| 75 |
+
page.wait_for_url("**/dashboard")
|
| 76 |
+
# page.wait_for_timeout(1000)
|
| 77 |
+
timetable = extract_timetable_text(page)
|
| 78 |
+
browser.close()
|
| 79 |
+
|
| 80 |
+
return timetable
|
| 81 |
+
|
| 82 |
+
if __name__=="__main__":
|
| 83 |
+
print(get_homework())
|
| 84 |
+
print()
|
| 85 |
+
print(get_timetable())
|