File size: 2,303 Bytes
57d46c6
b11c8cd
21a1796
b11c8cd
50639ab
57d46c6
b11c8cd
57d46c6
b11c8cd
57d46c6
 
 
 
b11c8cd
 
 
 
21a1796
b11c8cd
 
 
 
 
 
 
 
 
 
21a1796
 
 
 
 
 
 
b11c8cd
21a1796
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7e8139
21a1796
 
 
b11c8cd
21a1796
 
 
 
 
 
 
b11c8cd
21a1796
b11c8cd
 
 
 
2f7d2fd
 
b11c8cd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import requests
from bs4 import BeautifulSoup
import gradio as gr

api_token = os.environ.get("TOKEN")
API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
headers = {"Authorization": f"Bearer {api_token}"}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

def analyze_sentiment(text):
    output = query({
        "inputs": f'''<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
you are going to analyse the prompt that i'll give to you and tell me if they are either talking about "chat bot", "AI dev", 
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
{text}
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
'''
    })

    if isinstance(output, list) and len(output) > 0:
        response = output[0].get('generated_text', '').strip().lower()
        
        if "chat bot" in response:
            return "chat bot"
        elif "ai dev" in response:
            return "AI dev"
        else:
            return "autre"

def scrape_huggingface_posts(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Ajustez ce sélecteur selon la structure réelle de la page
    posts = soup.find_all('div', class_='space-y-3 pl-7')
    
    extracted_posts = []
    for post in posts:
        # Extrayez les informations pertinentes de chaque post
        title = post.find('h2', class_='post-title').text.strip()
        content = post.find('div', class_='post-content').text.strip()
        author = post.find('span', class_='post-author').text.strip()
        
        extracted_posts.append({
            'title': title,
            'content': content,
            'author': author
        })
    
    return extracted_posts

# Utilisation des fonctions
url = "https://huggingface.co/posts"
all_posts = scrape_huggingface_posts(url)

# Analyse de chaque post
for post in all_posts:
    category = analyze_sentiment(post['content'])
    print(f"Post titre: {post['title']}")
    print(f"Auteur: {post['author']}")
    print(f"Catégorie: {category}")
    print("---")

# Interface Gradio (si vous voulez la garder)
demo = gr.Interface(
    fn=analyze_sentiment,
    inputs="text",
    outputs="text"
)

demo.launch()