ginipick commited on
Commit
a9aa021
·
verified ·
1 Parent(s): 55d59a6

Create app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +280 -0
app-backup.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from phi3_instruct_graph import MODEL_LIST, Phi3InstructGraph
4
+ import rapidjson
5
+ from pyvis.network import Network
6
+ import networkx as nx
7
+ import spacy
8
+ from spacy import displacy
9
+ from spacy.tokens import Span
10
+ import random
11
+ from tqdm import tqdm
12
+
13
+ # Constants
14
+ TITLE = "🌐 GraphMind: Phi-3 Instruct Graph Explorer"
15
+ SUBTITLE = "✨ Extract and visualize knowledge graphs from any text in multiple languages"
16
+
17
+ # Custom CSS for styling
18
+ CUSTOM_CSS = """
19
+ .gradio-container {
20
+ font-family: 'Inter', 'Segoe UI', Roboto, sans-serif;
21
+ }
22
+ .gr-button-primary {
23
+ background-color: #6366f1 !important;
24
+ }
25
+ .gr-button-secondary {
26
+ border-color: #6366f1 !important;
27
+ color: #6366f1 !important;
28
+ }
29
+ """
30
+
31
+ # Color utilities
32
+ def get_random_light_color():
33
+ r = random.randint(140, 255)
34
+ g = random.randint(140, 255)
35
+ b = random.randint(140, 255)
36
+ return f"#{r:02x}{g:02x}{b:02x}"
37
+
38
+ # Text preprocessing
39
+ def handle_text(text):
40
+ return " ".join(text.split())
41
+
42
+ # Main processing functions
43
+ @spaces.GPU
44
+ def extract(text, model):
45
+ try:
46
+ model = Phi3InstructGraph(model=model)
47
+ result = model.extract(text)
48
+ return rapidjson.loads(result)
49
+ except Exception as e:
50
+ raise gr.Error(f"Extraction error: {str(e)}")
51
+
52
+ def find_token_indices(doc, substring, text):
53
+ result = []
54
+ start_index = text.find(substring)
55
+
56
+ while start_index != -1:
57
+ end_index = start_index + len(substring)
58
+ start_token = None
59
+ end_token = None
60
+
61
+ for token in doc:
62
+ if token.idx == start_index:
63
+ start_token = token.i
64
+ if token.idx + len(token) == end_index:
65
+ end_token = token.i + 1
66
+
67
+ if start_token is not None and end_token is not None:
68
+ result.append({
69
+ "start": start_token,
70
+ "end": end_token
71
+ })
72
+
73
+ # Search for next occurrence
74
+ start_index = text.find(substring, end_index)
75
+
76
+ return result
77
+
78
+ def create_custom_entity_viz(data, full_text):
79
+ nlp = spacy.blank("xx")
80
+ doc = nlp(full_text)
81
+
82
+ spans = []
83
+ colors = {}
84
+ for node in data["nodes"]:
85
+ entity_spans = find_token_indices(doc, node["id"], full_text)
86
+ for dataentity in entity_spans:
87
+ start = dataentity["start"]
88
+ end = dataentity["end"]
89
+
90
+ if start < len(doc) and end <= len(doc):
91
+ # Check for overlapping spans
92
+ overlapping = any(s.start < end and start < s.end for s in spans)
93
+ if not overlapping:
94
+ span = Span(doc, start, end, label=node["type"])
95
+ spans.append(span)
96
+ if node["type"] not in colors:
97
+ colors[node["type"]] = get_random_light_color()
98
+
99
+ doc.set_ents(spans, default="unmodified")
100
+ doc.spans["sc"] = spans
101
+
102
+ options = {
103
+ "colors": colors,
104
+ "ents": list(colors.keys()),
105
+ "style": "ent",
106
+ "manual": True
107
+ }
108
+
109
+ html = displacy.render(doc, style="span", options=options)
110
+ return html
111
+
112
+ def create_graph(json_data):
113
+ G = nx.Graph()
114
+
115
+ # Add nodes with tooltips
116
+ for node in json_data['nodes']:
117
+ G.add_node(node['id'], title=f"{node['type']}: {node['detailed_type']}")
118
+
119
+ # Add edges with labels
120
+ for edge in json_data['edges']:
121
+ G.add_edge(edge['from'], edge['to'], title=edge['label'], label=edge['label'])
122
+
123
+ # Create network visualization
124
+ nt = Network(
125
+ width="720px",
126
+ height="600px",
127
+ directed=True,
128
+ notebook=False,
129
+ bgcolor="#f8fafc",
130
+ font_color="#1e293b"
131
+ )
132
+
133
+ # Configure network display
134
+ nt.from_nx(G)
135
+ nt.barnes_hut(
136
+ gravity=-3000,
137
+ central_gravity=0.3,
138
+ spring_length=50,
139
+ spring_strength=0.001,
140
+ damping=0.09,
141
+ overlap=0,
142
+ )
143
+
144
+ # Customize edge appearance
145
+ for edge in nt.edges:
146
+ edge['width'] = 2
147
+ edge['arrows'] = {'to': {'enabled': True, 'type': 'arrow'}}
148
+ edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
149
+ edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
150
+
151
+ # Customize node appearance
152
+ for node in nt.nodes:
153
+ node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
154
+ node['font'] = {'size': 14, 'color': '#1e293b'}
155
+ node['shape'] = 'dot'
156
+ node['size'] = 25
157
+
158
+ # Generate HTML with iframe to isolate styles
159
+ html = nt.generate_html()
160
+ html = html.replace("'", '"')
161
+
162
+ return f"""<iframe style="width: 100%; height: 620px; margin: 0 auto; border-radius: 8px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);"
163
+ name="result" allow="midi; geolocation; microphone; camera; display-capture; encrypted-media;"
164
+ sandbox="allow-modals allow-forms allow-scripts allow-same-origin allow-popups
165
+ allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
166
+ allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
167
+
168
+ def process_and_visualize(text, model, progress=gr.Progress()):
169
+ if not text or not model:
170
+ raise gr.Error("⚠️ Both text and model must be provided.")
171
+
172
+ progress(0, desc="Starting extraction...")
173
+ json_data = extract(text, model)
174
+
175
+ progress(0.5, desc="Creating entity visualization...")
176
+ entities_viz = create_custom_entity_viz(json_data, text)
177
+
178
+ progress(0.8, desc="Building knowledge graph...")
179
+ graph_html = create_graph(json_data)
180
+
181
+ node_count = len(json_data["nodes"])
182
+ edge_count = len(json_data["edges"])
183
+ stats = f"📊 Extracted {node_count} entities and {edge_count} relationships"
184
+
185
+ progress(1.0, desc="Complete!")
186
+ return graph_html, entities_viz, json_data, stats
187
+
188
+ # Example texts in different languages
189
+ EXAMPLES = [
190
+ [handle_text("""Legendary rock band Aerosmith has officially announced their retirement from touring after 54 years, citing
191
+ lead singer Steven Tyler's unrecoverable vocal cord injury.
192
+ The decision comes after months of unsuccessful treatment for Tyler's fractured larynx,
193
+ which he suffered in September 2023.""")],
194
+
195
+ [handle_text("""Pop star Justin Timberlake, 43, had his driver's license suspended by a New York judge during a virtual
196
+ court hearing on August 2, 2024. The suspension follows Timberlake's arrest for driving while intoxicated (DWI)
197
+ in Sag Harbor on June 18. Timberlake, who is currently on tour in Europe,
198
+ pleaded not guilty to the charges.""")],
199
+
200
+ [handle_text("""세계적인 기술 기업 삼성전자는 새로운 인공지능 기반 스마트폰을 올해 하반기에 출시할 예정이라고 발표했다.
201
+ 이 스마트폰은 현재 개발 중인 갤럭시 시리즈의 최신작으로, 강력한 AI 기능과 혁신적인 카메라 시스템을 탑재할 것으로 알려졌다.
202
+ 삼성전자의 CEO는 이번 신제품이 스마트폰 시장에 새로운 혁신을 가져올 것이라고 전망했다.""")],
203
+
204
+ [handle_text("""한국 영화 '기생충'은 2020년 아카데미 시상식에서 작품상, 감독상, 각본상, 국제영화상 등 4개 부문을 수상하며 역사를 새로 썼다.
205
+ 봉준호 감독이 연출한 이 영화는 한국 영화 최초로 칸 영화제 황금종려상도 수상했으며, 전 세계적으로 엄청난 흥행과
206
+ 평단의 호평을 받았다.""")]
207
+ ]
208
+
209
+ def create_ui():
210
+ with gr.Blocks(css=CUSTOM_CSS, title=TITLE) as demo:
211
+ # Header
212
+ gr.Markdown(f"# {TITLE}")
213
+ gr.Markdown(f"{SUBTITLE}")
214
+
215
+ with gr.Row():
216
+ gr.Markdown("🌍 **Multilingual Support Available** 🔤")
217
+
218
+ # Main interface
219
+ with gr.Row():
220
+ # Input column
221
+ with gr.Column(scale=1):
222
+ input_model = gr.Dropdown(
223
+ MODEL_LIST,
224
+ label="🤖 Select Model",
225
+ info="Choose a model to process your text",
226
+ value=MODEL_LIST[0] if MODEL_LIST else None
227
+ )
228
+
229
+ input_text = gr.TextArea(
230
+ label="📝 Input Text",
231
+ info="Enter text in any language to extract a knowledge graph",
232
+ placeholder="Enter text here...",
233
+ lines=10
234
+ )
235
+
236
+ with gr.Row():
237
+ submit_button = gr.Button("🚀 Extract & Visualize", variant="primary", scale=2)
238
+ clear_button = gr.Button("🔄 Clear", variant="secondary", scale=1)
239
+
240
+ gr.Examples(
241
+ examples=EXAMPLES,
242
+ inputs=input_text,
243
+ label="📚 Example Texts (English & Korean)"
244
+ )
245
+
246
+ stats_output = gr.Markdown("", label="🔍 Analysis Results")
247
+
248
+ # Output column
249
+ with gr.Column(scale=1):
250
+ with gr.Tab("🧩 Knowledge Graph"):
251
+ output_graph = gr.HTML(label="")
252
+
253
+ with gr.Tab("🏷️ Entities"):
254
+ output_entity_viz = gr.HTML(label="")
255
+
256
+ with gr.Tab("📊 JSON Data"):
257
+ output_json = gr.JSON(label="")
258
+
259
+ # Functionality
260
+ submit_button.click(
261
+ fn=process_and_visualize,
262
+ inputs=[input_text, input_model],
263
+ outputs=[output_graph, output_entity_viz, output_json, stats_output]
264
+ )
265
+
266
+ clear_button.click(
267
+ fn=lambda: [None, None, None, ""],
268
+ inputs=[],
269
+ outputs=[output_graph, output_entity_viz, output_json, stats_output]
270
+ )
271
+
272
+ # Footer
273
+ gr.Markdown("---")
274
+ gr.Markdown("📋 **Instructions:** Enter text in any language, select a model, and click 'Extract & Visualize' to generate a knowledge graph.")
275
+ gr.Markdown("🛠️ Powered by Phi-3 Instruct Graph | Emergent Methods")
276
+
277
+ return demo
278
+
279
+ demo = create_ui()
280
+ demo.launch(share=False)