Omnibus commited on
Commit
0001c43
·
verified ·
1 Parent(s): 0147802

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -31
app.py CHANGED
@@ -35,39 +35,16 @@ def find_all(url):
35
  source = requests.get(url)
36
  #source = urllib.request.urlopen(url).read()
37
  soup = bs4.BeautifulSoup(source.content,'lxml')
38
- # title of the page
39
- print(soup.title)
40
- # get attributes:
41
- print(soup.title.name)
42
- # get values:
43
- print(soup.title.string)
44
- # beginning navigation:
45
- print(soup.title.parent.name)
46
- #rawp.append([tag.name for tag in soup.find_all()] )
47
- print([tag.name for tag in soup.find_all()])
48
- #rawp=(f'RAW TEXT RETURNED: {soup.text}')
49
- rawp=(f'RAW HTML RETURNED: {soup}')
50
  out.append(rawp)
 
51
  q=("a","p","span","content","article")
52
- for p in soup.find_all(q):
53
- out.append([{q:p.string,"parent":p.parent.name,"previous":[p.previous],"first-child":[b.name for b in p.children],"content":p}])
54
- #print (f'OUT :: {out}')
55
- '''
56
- c=0
57
- out = str(out)
58
- rl = len(out)
59
- print(f'rl:: {rl}')
60
- #for ea in out:
61
- for i in str(out):
62
- if i == " " or i=="," or i=="\n":
63
- c +=1
64
- print (f'c:: {c}')
65
- if rl > MAX_DATA:
66
- print("compressing...")
67
- rawp = compress_data(c,purpose,task,out)
68
- print (rawp)
69
- print (f'out:: {out}')
70
- '''
71
  print(rawp)
72
  return True, rawp
73
  else:
 
35
  source = requests.get(url)
36
  #source = urllib.request.urlopen(url).read()
37
  soup = bs4.BeautifulSoup(source.content,'lxml')
38
+
39
+ rawp=(f'RAW TEXT RETURNED: {soup.text}')
40
+ cnt=0
41
+ cnt+=len(rawp)
 
 
 
 
 
 
 
 
42
  out.append(rawp)
43
+ out.append("HTML fragments: ")
44
  q=("a","p","span","content","article")
45
+ for p in soup.find_all("a"):
46
+ out.append([{"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string}])
47
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  print(rawp)
49
  return True, rawp
50
  else: