NikosKprl commited on
Commit
9cb47cc
·
verified ·
1 Parent(s): bac6155

Update ✨Entity Linking Application✨.py

Browse files
Files changed (1) hide show
  1. ✨Entity Linking Application✨.py +21 -36
✨Entity Linking Application✨.py CHANGED
@@ -13,6 +13,7 @@ import streamlit as st
13
  import time
14
  from openai import OpenAI
15
  import sys
 
16
 
17
  folder_path = '/home/user/app/qids_folder'
18
 
@@ -49,21 +50,6 @@ else:
49
  st.warning("Please enter an API token to proceed.")
50
 
51
 
52
- async def fetch_url(session, url):
53
- pageids_list = []
54
- async with session.get(url) as response:
55
- x = await response.text()
56
- objective_list = x.split('"objectiveResults\\":')[-1].split(',\\"wikipediaResults\\"')[0].replace('\\\\\\"', "").replace("\\", "")
57
- wikipedia_list = x.split(',\\"wikipediaResults\\":')[-1].split(',\\"data-sentry-element\\"')[0].replace('\\\\\\"', "").replace("\\", "")
58
- data_1 = json.loads(objective_list)
59
- data_2 = json.loads(wikipedia_list)
60
- for i in data_1:
61
- pageids_list.append(i.get("page_id"))
62
- for i in data_2:
63
- pageids_list.append(i.get("pageid"))
64
- return pageids_list
65
-
66
-
67
  async def fetch_json(url, session):
68
  async with session.get(url) as response:
69
  return await response.json()
@@ -71,26 +57,25 @@ async def fetch_json(url, session):
71
  async def combination_method(name, session):
72
  async with aiohttp.ClientSession() as session:
73
  data = set()
74
- new_name = name.replace("+", " ").split()
75
  x = itertools_combinations(new_name, 2)
76
  for i in x:
77
- new_word = (i[0] + " " + i[1]).replace(" ", "+")
78
- url = f"https://www.objective.inc/demos/wikipedia?query={new_word}"
79
- page_source = await fetch_url(session, url)
80
- for i in page_source:
81
- data.add(i)
82
  return data
83
 
84
  async def single_method(name, session):
85
  async with aiohttp.ClientSession() as session:
86
  data = set()
87
- new_name = name.replace("+", " ").replace("-", " ").replace("/", " ").split()
88
  for i in new_name:
89
- new_word = i.replace(" ", "+")
90
- url = f"https://www.objective.inc/demos/wikipedia?query={new_word}"
91
- page_source = await fetch_url(session, url)
92
- for i in page_source:
93
- data.add(i)
94
  return data
95
 
96
  async def mains(name, single, combi):
@@ -99,10 +84,10 @@ async def mains(name, single, combi):
99
  qids = set()
100
 
101
  async with aiohttp.ClientSession() as session:
102
- url = f"https://www.objective.inc/demos/wikipedia?query={name}"
103
- page_source = await fetch_url(session, url)
104
- for i in page_source:
105
- data.add(i)
106
 
107
  wikipedia_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={name}&srlimit=1&srprop=&srenablerewrites=True&srinfo=suggestion&format=json"
108
  json_data = await fetch_json(wikipedia_url, session)
@@ -113,7 +98,7 @@ async def mains(name, single, combi):
113
  json_suggestion = await fetch_json(suggested_url, session)
114
  results = json_suggestion.get('query', {}).get('search')
115
  for i in results:
116
- data.add(int(i.get('pageid')))
117
 
118
  # Handle disambiguation links
119
  if data != {0}:
@@ -133,28 +118,28 @@ async def mains(name, single, combi):
133
  if ":" not in i:
134
  wikipedia_disamb = f"https://en.wikipedia.org/w/api.php?action=query&format=json&titles={i}&indexpageids"
135
  json_id = await fetch_json(wikipedia_disamb, session)
136
- real_title = json_id.get('query').get('pageids')
137
  disam_data.add(int(real_title[0]))
138
  else:
139
  disam_data.add(ids)
140
 
141
  # Makes combinations of the name
142
  if combi == "Yes":
143
- if len(name.replace("+", " ").replace("-", " ").split()) >= 3:
144
  combination_names = await combination_method(name, session)
145
  for i in combination_names:
146
  disam_data.add(i)
147
 
148
  # Checks every word alone
149
  if single == "Yes":
150
- if len(name.replace("+", " ").replace("-", " ").replace("/", " ").split()) >= 2:
151
  singles = await single_method(name, session)
152
  for i in singles:
153
  disam_data.add(i)
154
 
155
  for ids in disam_data:
156
  try:
157
- wikibase_url = f"https://en.wikipedia.org/w/api.php?action=query&pageids={ids}&prop=pageprops&format=json"
158
  json_qid = await fetch_json(wikibase_url, session)
159
  wikidata_qid = json_qid.get('query', {}).get('pages', {}).get(str(ids), {}).get('pageprops', {}).get('wikibase_item', {})
160
  if wikidata_qid:
 
13
  import time
14
  from openai import OpenAI
15
  import sys
16
+ from googlesearch import search
17
 
18
  folder_path = '/home/user/app/qids_folder'
19
 
 
50
  st.warning("Please enter an API token to proceed.")
51
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  async def fetch_json(url, session):
54
  async with session.get(url) as response:
55
  return await response.json()
 
57
  async def combination_method(name, session):
58
  async with aiohttp.ClientSession() as session:
59
  data = set()
60
+ new_name = name.split()
61
  x = itertools_combinations(new_name, 2)
62
  for i in x:
63
+ new_word = (i[0] + " " + i[1])
64
+ url = f"{new_word} site:en.wikipedia.org"
65
+ search = search(url, num_results = 12, lang="en")
66
+ for i in search:
67
+ data.add(i.split("/")[-1])
68
  return data
69
 
70
  async def single_method(name, session):
71
  async with aiohttp.ClientSession() as session:
72
  data = set()
73
+ new_name = name.replace("-", " ").replace("/", " ").split()
74
  for i in new_name:
75
+ url = f"{i} site:en.wikipedia.org"
76
+ search = search(url, num_results = 12, lang="en")
77
+ for i in search:
78
+ data.add(i.split("/")[-1])
 
79
  return data
80
 
81
  async def mains(name, single, combi):
 
84
  qids = set()
85
 
86
  async with aiohttp.ClientSession() as session:
87
+ url = f"{name} site:en.wikipedia.org"
88
+ search= search(url, num_results = 30, lang="en")
89
+ for i in search:
90
+ data.add(i.split("/")[-1])
91
 
92
  wikipedia_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={name}&srlimit=1&srprop=&srenablerewrites=True&srinfo=suggestion&format=json"
93
  json_data = await fetch_json(wikipedia_url, session)
 
98
  json_suggestion = await fetch_json(suggested_url, session)
99
  results = json_suggestion.get('query', {}).get('search')
100
  for i in results:
101
+ data.add(int(i.get('title')))
102
 
103
  # Handle disambiguation links
104
  if data != {0}:
 
118
  if ":" not in i:
119
  wikipedia_disamb = f"https://en.wikipedia.org/w/api.php?action=query&format=json&titles={i}&indexpageids"
120
  json_id = await fetch_json(wikipedia_disamb, session)
121
+ real_title = json_id.get('query').get('title')
122
  disam_data.add(int(real_title[0]))
123
  else:
124
  disam_data.add(ids)
125
 
126
  # Makes combinations of the name
127
  if combi == "Yes":
128
+ if len(name.replace("-", " ").split()) >= 3:
129
  combination_names = await combination_method(name, session)
130
  for i in combination_names:
131
  disam_data.add(i)
132
 
133
  # Checks every word alone
134
  if single == "Yes":
135
+ if len(name.replace("-", " ").replace("/", " ").split()) >= 2:
136
  singles = await single_method(name, session)
137
  for i in singles:
138
  disam_data.add(i)
139
 
140
  for ids in disam_data:
141
  try:
142
+ wikibase_url = f"https://en.wikipedia.org/w/api.php?action=query&titles={ids}&prop=pageprops&format=json"
143
  json_qid = await fetch_json(wikibase_url, session)
144
  wikidata_qid = json_qid.get('query', {}).get('pages', {}).get(str(ids), {}).get('pageprops', {}).get('wikibase_item', {})
145
  if wikidata_qid: