talha13213 commited on
Commit
d8c8769
·
1 Parent(s): 6ad738a
Files changed (2) hide show
  1. app.py +74 -0
  2. requirement.txt +5 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from dotenv import load_dotenv
4
+ from bs4 import BeautifulSoup
5
+ from openai import OpenAI
6
+ import gradio as gr
7
+
8
+ load_dotenv(override=True)
9
+ api_key = os.getenv('OPENAI_API_KEY')
10
+
11
+ if not api_key:
12
+ print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
13
+ elif not api_key.startswith("sk-proj-"):
14
+ print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
15
+ elif api_key.strip() != api_key:
16
+ print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
17
+ else:
18
+ print("API key found and looks good so far!")
19
+
20
+ openai = OpenAI()
21
+
22
+ headers = {
23
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
24
+ }
25
+
26
+ class Website:
27
+ def __init__(self, url):
28
+ self.url = url
29
+ response = requests.get(url, headers=headers)
30
+ soup = BeautifulSoup(response.content, 'html.parser')
31
+ self.title = soup.title.string if soup.title else "No title found"
32
+ for irrelivent in soup.body(["script", "style", "img", "input"]):
33
+ irrelivent.decompose()
34
+ self.text = soup.body.get_text(separator="\n", strip=True)
35
+
36
+ system_prompt = "You are an assistant that analyzes the contents of a website \
37
+ and provides a short summary, ignoring text that might be navigation related. \
38
+ Respond in markdown."
39
+
40
+ def user_prompt(website):
41
+ userPrompt = f'you are looking at the website whose title is {website.title}'
42
+ userPrompt += '\nThe contents of this website is as follows; \
43
+ please provide a short summary of this website in markdown. \
44
+ If it includes news or announcements, then summarize these too.\n\n'
45
+ userPrompt += f'the content of the website are {website.text}'
46
+ return userPrompt
47
+
48
+ def message(web):
49
+ return [
50
+ {"role": "system", "content": system_prompt},
51
+ {"role": "user", "content": user_prompt(web)}
52
+ ]
53
+
54
+ def summarize_website(website_url):
55
+ try:
56
+ web = Website(website_url)
57
+ response = openai.chat.completions.create(
58
+ model='gpt-4o-mini',
59
+ messages=message(web)
60
+ )
61
+ return response.choices[0].message.content
62
+ except Exception as e:
63
+ return f"An error occurred: {e}"
64
+
65
+ iface = gr.Interface(
66
+ fn=summarize_website,
67
+ inputs=gr.Textbox(lines=1, placeholder="Enter website URL here..."),
68
+ outputs="markdown",
69
+ title="Website Summarizer",
70
+ description="Enter a URL and get a summary of the website content."
71
+ )
72
+
73
+ if __name__ == "__main__":
74
+ iface.launch()
requirement.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai
2
+ dotenv
3
+ beautifulsoup4
4
+ requests
5
+ gradio