|
|
|
import requests, base64, os |
|
|
|
data = { |
|
"urls": ["https://www.nbcnews.com/business"], |
|
"screenshot": True, |
|
} |
|
|
|
response = requests.post("https://crawl4ai.com/crawl", json=data) |
|
result = response.json()['results'][0] |
|
print(result.keys()) |
|
|
|
|
|
|
|
with open("screenshot.png", "wb") as f: |
|
f.write(base64.b64decode(result['screenshot'])) |
|
|
|
|
|
data = { |
|
"urls": [ |
|
"https://www.nbcnews.com/business" |
|
], |
|
"css_selector": "article", |
|
"screenshot": True, |
|
} |
|
|
|
|
|
data = { |
|
"urls": [ |
|
"https://www.nbcnews.com/business" |
|
], |
|
"screenshot": True, |
|
'js' : [""" |
|
const loadMoreButton = Array.from(document.querySelectorAll('button')). |
|
find(button => button.textContent.includes('Load More')); |
|
loadMoreButton && loadMoreButton.click(); |
|
"""] |
|
} |
|
|
|
|
|
data = { |
|
"urls": [ |
|
"https://www.nbcnews.com/business" |
|
], |
|
"extraction_strategy": "CosineStrategy", |
|
"extraction_strategy_args": { |
|
"semantic_filter": "inflation rent prices" |
|
}, |
|
} |
|
|
|
|
|
data = { |
|
"urls": [ |
|
"https://www.nbcnews.com/business" |
|
], |
|
"extraction_strategy": "LLMExtractionStrategy", |
|
"extraction_strategy_args": { |
|
"provider": "groq/llama3-8b-8192", |
|
"api_token": os.environ.get("GROQ_API_KEY"), |
|
"instruction": """I am interested in only financial news, |
|
and translate them in French.""" |
|
}, |
|
} |
|
|
|
|