File size: 1,744 Bytes
03c0888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

import requests, base64, os

data = {
    "urls": ["https://www.nbcnews.com/business"],
    "screenshot": True,
}

response = requests.post("https://crawl4ai.com/crawl", json=data) 
result = response.json()['results'][0]
print(result.keys())
# dict_keys(['url', 'html', 'success', 'cleaned_html', 'media', 
# 'links', 'screenshot', 'markdown', 'extracted_content', 
# 'metadata', 'error_message'])
with open("screenshot.png", "wb") as f:
    f.write(base64.b64decode(result['screenshot']))
    
# Example of filtering the content using CSS selectors
data = {
    "urls": [
        "https://www.nbcnews.com/business"
    ],
    "css_selector": "article",
    "screenshot": True,
}

# Example of executing a JS script on the page before extracting the content
data = {
    "urls": [
        "https://www.nbcnews.com/business"
    ],
    "screenshot": True,
    'js' : ["""
    const loadMoreButton = Array.from(document.querySelectorAll('button')).
    find(button => button.textContent.includes('Load More'));
    loadMoreButton && loadMoreButton.click();
    """]
}

# Example of using a custom extraction strategy
data = {
    "urls": [
        "https://www.nbcnews.com/business"
    ],
    "extraction_strategy": "CosineStrategy",
    "extraction_strategy_args": {
        "semantic_filter": "inflation rent prices"
    },
}

# Example of using LLM to extract content
data = {
    "urls": [
        "https://www.nbcnews.com/business"
    ],
    "extraction_strategy": "LLMExtractionStrategy",
    "extraction_strategy_args": {
        "provider": "groq/llama3-8b-8192",
        "api_token": os.environ.get("GROQ_API_KEY"),
        "instruction": """I am interested in only financial news, 
        and translate them in French."""
    },
}