resrtucturing of the repo
Browse files- app.py +10 -5
- file_processing.py +149 -0
- knowledge.py +517 -0
- responses.py +112 -0
app.py
CHANGED
|
@@ -15,6 +15,11 @@ import matplotlib.pyplot as plt
|
|
| 15 |
from matplotlib.backends.backend_agg import FigureCanvasAgg
|
| 16 |
import plotly.graph_objects as go
|
| 17 |
import plotly.express as px
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# ==========================================================
|
| 20 |
# 🧠 1. Global Knowledge Graph with Persistent Storage
|
|
@@ -2127,7 +2132,7 @@ with gr.Blocks(title="Research Brain") as demo:
|
|
| 2127 |
with gr.Column(scale=1):
|
| 2128 |
gr.Markdown("### Research Assistant")
|
| 2129 |
chatbot = gr.ChatInterface(
|
| 2130 |
-
fn=lambda message, history:
|
| 2131 |
title="Query Knowledge Base",
|
| 2132 |
description="Ask questions about your research data. Explore findings, relationships, and insights.",
|
| 2133 |
examples=[
|
|
@@ -2177,19 +2182,19 @@ with gr.Blocks(title="Research Brain") as demo:
|
|
| 2177 |
)
|
| 2178 |
|
| 2179 |
upload_file_button.click(
|
| 2180 |
-
fn=
|
| 2181 |
inputs=file_upload,
|
| 2182 |
outputs=graph_info
|
| 2183 |
)
|
| 2184 |
|
| 2185 |
show_button.click(
|
| 2186 |
-
fn=
|
| 2187 |
inputs=None,
|
| 2188 |
outputs=graph_view
|
| 2189 |
)
|
| 2190 |
|
| 2191 |
visualize_button.click(
|
| 2192 |
-
fn=
|
| 2193 |
inputs=None,
|
| 2194 |
outputs=graph_plot
|
| 2195 |
)
|
|
@@ -2200,7 +2205,7 @@ with gr.Blocks(title="Research Brain") as demo:
|
|
| 2200 |
)
|
| 2201 |
|
| 2202 |
import_json_button.click(
|
| 2203 |
-
fn=
|
| 2204 |
inputs=json_upload,
|
| 2205 |
outputs=graph_info
|
| 2206 |
)
|
|
|
|
| 15 |
from matplotlib.backends.backend_agg import FigureCanvasAgg
|
| 16 |
import plotly.graph_objects as go
|
| 17 |
import plotly.express as px
|
| 18 |
+
from file_processing import handle_file_upload as fp_handle_file_upload
|
| 19 |
+
from knowledge import show_graph_contents as kb_show_graph_contents
|
| 20 |
+
from knowledge import visualize_knowledge_graph as kb_visualize_knowledge_graph
|
| 21 |
+
from knowledge import import_knowledge_from_json_file as kb_import_json
|
| 22 |
+
from responses import respond as rqa_respond
|
| 23 |
|
| 24 |
# ==========================================================
|
| 25 |
# 🧠 1. Global Knowledge Graph with Persistent Storage
|
|
|
|
| 2132 |
with gr.Column(scale=1):
|
| 2133 |
gr.Markdown("### Research Assistant")
|
| 2134 |
chatbot = gr.ChatInterface(
|
| 2135 |
+
fn=lambda message, history: rqa_respond(message, history),
|
| 2136 |
title="Query Knowledge Base",
|
| 2137 |
description="Ask questions about your research data. Explore findings, relationships, and insights.",
|
| 2138 |
examples=[
|
|
|
|
| 2182 |
)
|
| 2183 |
|
| 2184 |
upload_file_button.click(
|
| 2185 |
+
fn=fp_handle_file_upload,
|
| 2186 |
inputs=file_upload,
|
| 2187 |
outputs=graph_info
|
| 2188 |
)
|
| 2189 |
|
| 2190 |
show_button.click(
|
| 2191 |
+
fn=kb_show_graph_contents,
|
| 2192 |
inputs=None,
|
| 2193 |
outputs=graph_view
|
| 2194 |
)
|
| 2195 |
|
| 2196 |
visualize_button.click(
|
| 2197 |
+
fn=kb_visualize_knowledge_graph,
|
| 2198 |
inputs=None,
|
| 2199 |
outputs=graph_plot
|
| 2200 |
)
|
|
|
|
| 2205 |
)
|
| 2206 |
|
| 2207 |
import_json_button.click(
|
| 2208 |
+
fn=kb_import_json,
|
| 2209 |
inputs=json_upload,
|
| 2210 |
outputs=graph_info
|
| 2211 |
)
|
file_processing.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from datetime import datetime
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import PyPDF2
|
| 5 |
+
from docx import Document
|
| 6 |
+
from knowledge import add_to_graph
|
| 7 |
+
|
| 8 |
+
last_extracted_text = ""
|
| 9 |
+
processed_files = []
|
| 10 |
+
|
| 11 |
+
def extract_text_from_pdf(file_path):
|
| 12 |
+
try:
|
| 13 |
+
with open(file_path, 'rb') as file:
|
| 14 |
+
pdf_reader = PyPDF2.PdfReader(file)
|
| 15 |
+
text = ""
|
| 16 |
+
for page in pdf_reader.pages:
|
| 17 |
+
page_text = page.extract_text()
|
| 18 |
+
text += (page_text or "") + "\n"
|
| 19 |
+
return text.strip()
|
| 20 |
+
except Exception as e:
|
| 21 |
+
return f"Error reading PDF: {e}"
|
| 22 |
+
|
| 23 |
+
def extract_text_from_docx(file_path):
|
| 24 |
+
try:
|
| 25 |
+
doc = Document(file_path)
|
| 26 |
+
text = "".join(p.text + "\n" for p in doc.paragraphs)
|
| 27 |
+
return text.strip()
|
| 28 |
+
except Exception as e:
|
| 29 |
+
return f"Error reading DOCX: {e}"
|
| 30 |
+
|
| 31 |
+
def extract_text_from_txt(file_path):
|
| 32 |
+
try:
|
| 33 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
| 34 |
+
return file.read().strip()
|
| 35 |
+
except Exception as e:
|
| 36 |
+
return f"Error reading TXT: {e}"
|
| 37 |
+
|
| 38 |
+
def extract_text_from_csv(file_path):
|
| 39 |
+
try:
|
| 40 |
+
df = pd.read_csv(file_path)
|
| 41 |
+
text = f"CSV Data with {len(df)} rows and {len(df.columns)} columns:\n\n"
|
| 42 |
+
text += f"Columns: {', '.join(df.columns)}\n\n"
|
| 43 |
+
text += "Sample data:\n"
|
| 44 |
+
for i, row in df.head(5).iterrows():
|
| 45 |
+
text += f"Row {i+1}: {dict(row)}\n"
|
| 46 |
+
return text.strip()
|
| 47 |
+
except Exception as e:
|
| 48 |
+
return f"Error reading CSV: {e}"
|
| 49 |
+
|
| 50 |
+
def update_extracted_text(text):
|
| 51 |
+
global last_extracted_text
|
| 52 |
+
last_extracted_text = text
|
| 53 |
+
|
| 54 |
+
def show_extracted_text():
|
| 55 |
+
global last_extracted_text
|
| 56 |
+
if not last_extracted_text:
|
| 57 |
+
return " No file has been processed yet.\n\nUpload a file and process it to see the extracted text here."
|
| 58 |
+
preview = last_extracted_text[:1000]
|
| 59 |
+
if len(last_extracted_text) > 1000:
|
| 60 |
+
preview += "\n\n... (truncated, showing first 1000 characters)"
|
| 61 |
+
return f" **Last Extracted Text:**\n\n{preview}"
|
| 62 |
+
|
| 63 |
+
def process_uploaded_file(file):
|
| 64 |
+
if file is None:
|
| 65 |
+
return "No file uploaded."
|
| 66 |
+
file_path = file.name
|
| 67 |
+
file_extension = os.path.splitext(file_path)[1].lower()
|
| 68 |
+
if file_extension == '.pdf':
|
| 69 |
+
extracted_text = extract_text_from_pdf(file_path)
|
| 70 |
+
elif file_extension == '.docx':
|
| 71 |
+
extracted_text = extract_text_from_docx(file_path)
|
| 72 |
+
elif file_extension == '.txt':
|
| 73 |
+
extracted_text = extract_text_from_txt(file_path)
|
| 74 |
+
elif file_extension == '.csv':
|
| 75 |
+
extracted_text = extract_text_from_csv(file_path)
|
| 76 |
+
else:
|
| 77 |
+
return f" Unsupported file type: {file_extension}\n\nSupported formats: PDF, DOCX, TXT, CSV"
|
| 78 |
+
if extracted_text.startswith("Error"):
|
| 79 |
+
return f" {extracted_text}"
|
| 80 |
+
update_extracted_text(extracted_text)
|
| 81 |
+
preview = extracted_text[:300] + "..." if len(extracted_text) > 300 else extracted_text
|
| 82 |
+
result = add_to_graph(extracted_text)
|
| 83 |
+
file_size = len(extracted_text)
|
| 84 |
+
return f" Successfully processed {os.path.basename(file_path)}!\n\n📊 File stats:\n• Size: {file_size:,} characters\n• Type: {file_extension.upper()}\n\n Text preview:\n{preview}\n\n{result}"
|
| 85 |
+
|
| 86 |
+
def handle_file_upload(files):
|
| 87 |
+
global processed_files
|
| 88 |
+
if not files or len(files) == 0:
|
| 89 |
+
return "Please select at least one file to process."
|
| 90 |
+
results = []
|
| 91 |
+
new_processed = []
|
| 92 |
+
for file in files:
|
| 93 |
+
if file is None:
|
| 94 |
+
continue
|
| 95 |
+
try:
|
| 96 |
+
if isinstance(file, str):
|
| 97 |
+
file_path = file
|
| 98 |
+
file_name = os.path.basename(file)
|
| 99 |
+
else:
|
| 100 |
+
file_path = file.name
|
| 101 |
+
file_name = os.path.basename(file.name)
|
| 102 |
+
if any(f['name'] == file_name for f in processed_files):
|
| 103 |
+
results.append(f"SKIP: {file_name} - Already processed, skipping")
|
| 104 |
+
continue
|
| 105 |
+
result = process_uploaded_file(file)
|
| 106 |
+
results.append(f"SUCCESS: {file_name} - {result}")
|
| 107 |
+
new_processed.append({
|
| 108 |
+
'name': file_name,
|
| 109 |
+
'size': os.path.getsize(file_path) if os.path.exists(file_path) else 0,
|
| 110 |
+
'processed_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
| 111 |
+
})
|
| 112 |
+
except Exception as e:
|
| 113 |
+
file_name = os.path.basename(file) if isinstance(file, str) else os.path.basename(file.name) if hasattr(file, 'name') else str(file)
|
| 114 |
+
results.append(f"ERROR: {file_name} - Error: {e}")
|
| 115 |
+
processed_files.extend(new_processed)
|
| 116 |
+
total_files = len(files)
|
| 117 |
+
successful = len([r for r in results if r.startswith("SUCCESS")])
|
| 118 |
+
skipped = len([r for r in results if r.startswith("SKIP")])
|
| 119 |
+
failed = len([r for r in results if r.startswith("ERROR")])
|
| 120 |
+
summary = f"**Upload Summary:**\n"
|
| 121 |
+
summary += f"• Total files: {total_files}\n"
|
| 122 |
+
summary += f"• Successfully processed: {successful}\n"
|
| 123 |
+
summary += f"• Already processed: {skipped}\n"
|
| 124 |
+
summary += f"• Failed: {failed}\n\n"
|
| 125 |
+
summary += "**File Results:**\n"
|
| 126 |
+
for result in results:
|
| 127 |
+
summary += f"{result}\n"
|
| 128 |
+
return summary
|
| 129 |
+
|
| 130 |
+
def show_processed_files():
|
| 131 |
+
global processed_files
|
| 132 |
+
if not processed_files:
|
| 133 |
+
return "**No files processed yet.**\n\n**Start building your knowledge base:**\n1. Select one or more files (PDF, DOCX, TXT, CSV)\n2. Click 'Process Files' to extract knowledge\n3. View your processed files here\n4. Upload more files to expand your knowledge base!"
|
| 134 |
+
result = f"**Processed Files ({len(processed_files)}):**\n\n"
|
| 135 |
+
for i, file_info in enumerate(processed_files, 1):
|
| 136 |
+
result += f"**{i}. {file_info['name']}**\n"
|
| 137 |
+
result += f" • Size: {file_info['size']:,} bytes\n"
|
| 138 |
+
result += f" • Processed: {file_info['processed_at']}\n\n"
|
| 139 |
+
return result
|
| 140 |
+
|
| 141 |
+
def clear_processed_files():
|
| 142 |
+
global processed_files
|
| 143 |
+
processed_files = []
|
| 144 |
+
return "Processed files list cleared. You can now re-upload previously processed files."
|
| 145 |
+
|
| 146 |
+
def simple_test():
|
| 147 |
+
return " Event handler is working! Button clicked successfully!"
|
| 148 |
+
|
| 149 |
+
|
knowledge.py
ADDED
|
@@ -0,0 +1,517 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import pickle
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
import rdflib
|
| 6 |
+
import re
|
| 7 |
+
import networkx as nx
|
| 8 |
+
|
| 9 |
+
# Storage file paths
|
| 10 |
+
KNOWLEDGE_FILE = "knowledge_graph.pkl"
|
| 11 |
+
BACKUP_FILE = "knowledge_backup.json"
|
| 12 |
+
|
| 13 |
+
# Global RDF graph
|
| 14 |
+
graph = rdflib.Graph()
|
| 15 |
+
|
| 16 |
+
# Mapping of fact IDs to triples for editing operations
|
| 17 |
+
fact_index = {}
|
| 18 |
+
|
| 19 |
+
def save_knowledge_graph():
|
| 20 |
+
try:
|
| 21 |
+
with open(KNOWLEDGE_FILE, 'wb') as f:
|
| 22 |
+
pickle.dump(graph, f)
|
| 23 |
+
backup_data = {
|
| 24 |
+
"timestamp": datetime.now().isoformat(),
|
| 25 |
+
"total_facts": len(graph),
|
| 26 |
+
"facts": []
|
| 27 |
+
}
|
| 28 |
+
for i, (s, p, o) in enumerate(graph):
|
| 29 |
+
backup_data["facts"].append({
|
| 30 |
+
"id": i+1,
|
| 31 |
+
"subject": str(s),
|
| 32 |
+
"predicate": str(p),
|
| 33 |
+
"object": str(o)
|
| 34 |
+
})
|
| 35 |
+
with open(BACKUP_FILE, 'w', encoding='utf-8') as f:
|
| 36 |
+
json.dump(backup_data, f, indent=2, ensure_ascii=False)
|
| 37 |
+
return f" Saved {len(graph)} facts to storage"
|
| 38 |
+
except Exception as e:
|
| 39 |
+
return f" Error saving knowledge: {e}"
|
| 40 |
+
|
| 41 |
+
def load_knowledge_graph():
|
| 42 |
+
global graph
|
| 43 |
+
try:
|
| 44 |
+
if os.path.exists(KNOWLEDGE_FILE):
|
| 45 |
+
with open(KNOWLEDGE_FILE, 'rb') as f:
|
| 46 |
+
graph = pickle.load(f)
|
| 47 |
+
return f"📂 Loaded {len(graph)} facts from storage"
|
| 48 |
+
else:
|
| 49 |
+
return "📂 No existing knowledge file found, starting fresh"
|
| 50 |
+
except Exception as e:
|
| 51 |
+
return f" Error loading knowledge: {e}"
|
| 52 |
+
|
| 53 |
+
def create_comprehensive_backup():
|
| 54 |
+
try:
|
| 55 |
+
backup_data = {
|
| 56 |
+
"metadata": {
|
| 57 |
+
"timestamp": datetime.now().isoformat(),
|
| 58 |
+
"total_facts": len(graph),
|
| 59 |
+
"backup_type": "comprehensive_knowledge_base",
|
| 60 |
+
"graph_size": len(graph)
|
| 61 |
+
},
|
| 62 |
+
"facts": []
|
| 63 |
+
}
|
| 64 |
+
for i, (s, p, o) in enumerate(graph):
|
| 65 |
+
subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
|
| 66 |
+
predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
|
| 67 |
+
object_val = str(o)
|
| 68 |
+
backup_data["facts"].append({
|
| 69 |
+
"id": i + 1,
|
| 70 |
+
"subject": subject,
|
| 71 |
+
"predicate": predicate,
|
| 72 |
+
"object": object_val,
|
| 73 |
+
"full_subject": str(s),
|
| 74 |
+
"full_predicate": str(p),
|
| 75 |
+
"full_object": str(o)
|
| 76 |
+
})
|
| 77 |
+
with open(BACKUP_FILE, 'w', encoding='utf-8') as f:
|
| 78 |
+
json.dump(backup_data, f, indent=2, ensure_ascii=False)
|
| 79 |
+
except Exception:
|
| 80 |
+
create_error_backup("unknown")
|
| 81 |
+
|
| 82 |
+
def create_error_backup(error_message):
|
| 83 |
+
try:
|
| 84 |
+
backup_data = {
|
| 85 |
+
"metadata": {
|
| 86 |
+
"timestamp": datetime.now().isoformat(),
|
| 87 |
+
"total_facts": 0,
|
| 88 |
+
"backup_type": "error_backup",
|
| 89 |
+
"error": error_message
|
| 90 |
+
},
|
| 91 |
+
"facts": []
|
| 92 |
+
}
|
| 93 |
+
with open(BACKUP_FILE, 'w', encoding='utf-8') as f:
|
| 94 |
+
json.dump(backup_data, f, indent=2, ensure_ascii=False)
|
| 95 |
+
except Exception:
|
| 96 |
+
pass
|
| 97 |
+
|
| 98 |
+
def extract_entities(text):
|
| 99 |
+
entities = []
|
| 100 |
+
capitalized_words = re.findall(r'\b[A-Z][a-z]+\s+[A-Z][a-z]+(?:\s+[A-Z][a-z]+)?\b', text)
|
| 101 |
+
entities.extend(capitalized_words)
|
| 102 |
+
org_patterns = [
|
| 103 |
+
r'([A-Z][a-zA-Z\s]+)\s+(Inc|Ltd|LLC|Corp|Corporation|Company|Co\.|Ltd\.)',
|
| 104 |
+
r'([A-Z][a-zA-Z\s]+)\s+(University|Institute|Lab|Laboratory)',
|
| 105 |
+
]
|
| 106 |
+
for pattern in org_patterns:
|
| 107 |
+
matches = re.findall(pattern, text)
|
| 108 |
+
entities.extend([m[0].strip() for m in matches])
|
| 109 |
+
location_keywords = ['in ', 'at ', 'near ', 'from ']
|
| 110 |
+
for keyword in location_keywords:
|
| 111 |
+
pattern = f'{keyword}([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)'
|
| 112 |
+
matches = re.findall(pattern, text)
|
| 113 |
+
entities.extend(matches)
|
| 114 |
+
dates = re.findall(r'\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b|\b\d{4}\b', text)
|
| 115 |
+
entities.extend(dates)
|
| 116 |
+
entities = list(set([e.strip() for e in entities if len(e.strip()) > 3]))
|
| 117 |
+
return entities[:50]
|
| 118 |
+
|
| 119 |
+
def extract_regular_triples_improved(text, entities):
|
| 120 |
+
triples = []
|
| 121 |
+
sentences = re.split(r'[.!?\n]+', text)
|
| 122 |
+
for sentence in sentences:
|
| 123 |
+
sentence = sentence.strip()
|
| 124 |
+
if len(sentence) < 15:
|
| 125 |
+
continue
|
| 126 |
+
improved_patterns = [
|
| 127 |
+
(r'([A-Z][a-zA-Z\s]+(?:,\s+[A-Z][a-zA-Z\s]+)*)\s+(is|are|was|were|becomes|represents|means|refers to|denotes)\s+(.+)', 'relates to'),
|
| 128 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(uses|employs|utilizes|applies)\s+(.+)', 'uses'),
|
| 129 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(develops|created|designed|implemented)\s+(.+)', 'creates'),
|
| 130 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(requires|needs|demands)\s+(.+)', 'requires'),
|
| 131 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(enables|allows|permits)\s+(.+)', 'enables'),
|
| 132 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(affects|impacts|influences|affects)\s+(.+)', 'affects'),
|
| 133 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(found|discovered|identified|observed|detected)\s+(.+)', 'discovered'),
|
| 134 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(studies|analyzes|examines|investigates)\s+(.+)', 'studies'),
|
| 135 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(proposes|suggests|recommends)\s+(.+)', 'proposes'),
|
| 136 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(results in|leads to|causes)\s+(.+)', 'causes'),
|
| 137 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(works with|collaborates with|partnered with)\s+(.+)', 'works with'),
|
| 138 |
+
(r'([A-Z][a-zA-Z\s]+)\s+(located in|based in|situated in)\s+(.+)', 'located in'),
|
| 139 |
+
]
|
| 140 |
+
for pattern, predicate in improved_patterns:
|
| 141 |
+
match = re.search(pattern, sentence, re.IGNORECASE)
|
| 142 |
+
if match:
|
| 143 |
+
groups = match.groups()
|
| 144 |
+
subject = groups[0].strip() if len(groups) > 0 else ''
|
| 145 |
+
object_val = groups[-1].strip() if len(groups) > 1 else ''
|
| 146 |
+
subject = re.sub(r'^(the|a|an)\s+', '', subject, flags=re.IGNORECASE).strip()
|
| 147 |
+
object_val = re.sub(r'^(the|a|an)\s+', '', object_val, flags=re.IGNORECASE).strip()
|
| 148 |
+
if subject and object_val and len(subject) > 3 and len(object_val) > 3:
|
| 149 |
+
triples.append((subject, predicate, object_val))
|
| 150 |
+
break
|
| 151 |
+
clause_patterns = [
|
| 152 |
+
r'([A-Z][a-zA-Z\s]+)\s+which\s+(.+)',
|
| 153 |
+
r'([A-Z][a-zA-Z\s]+)\s+that\s+(.+)',
|
| 154 |
+
r'([A-Z][a-zA-Z\s]+)\s+who\s+(.+)',
|
| 155 |
+
]
|
| 156 |
+
for pattern in clause_patterns:
|
| 157 |
+
match = re.search(pattern, sentence)
|
| 158 |
+
if match:
|
| 159 |
+
subject = match.group(1).strip()
|
| 160 |
+
description = match.group(2).strip()
|
| 161 |
+
if subject and description and len(subject) > 3 and len(description) > 3:
|
| 162 |
+
triples.append((subject, 'has property', description[:150]))
|
| 163 |
+
return triples
|
| 164 |
+
|
| 165 |
+
def extract_structured_triples(text):
|
| 166 |
+
triples = []
|
| 167 |
+
lines = text.split('\n')
|
| 168 |
+
patterns = [
|
| 169 |
+
(r'date\s*:?\s*([0-9\/\-\.]+)', 'date', 'is'),
|
| 170 |
+
(r'time\s*:?\s*([0-9:]+)', 'time', 'is'),
|
| 171 |
+
(r'created\s*:?\s*([0-9\/\-\.]+)', 'created_date', 'is'),
|
| 172 |
+
(r'modified\s*:?\s*([0-9\/\-\.]+)', 'modified_date', 'is'),
|
| 173 |
+
(r'id\s*:?\s*([A-Z0-9\-]+)', 'id', 'is'),
|
| 174 |
+
(r'number\s*:?\s*([A-Z0-9\-]+)', 'number', 'is'),
|
| 175 |
+
(r'code\s*:?\s*([A-Z0-9\-]+)', 'code', 'is'),
|
| 176 |
+
(r'reference\s*:?\s*([A-Z0-9\-]+)', 'reference', 'is'),
|
| 177 |
+
(r'name\s*:?\s*([A-Za-z\s&.,]+)', 'name', 'is'),
|
| 178 |
+
(r'title\s*:?\s*([A-Za-z\s&.,]+)', 'title', 'is'),
|
| 179 |
+
(r'company\s*:?\s*([A-Za-z\s&.,]+)', 'company', 'is'),
|
| 180 |
+
(r'organization\s*:?\s*([A-Za-z\s&.,]+)', 'organization', 'is'),
|
| 181 |
+
(r'email\s*:?\s*([A-Za-z0-9@\.\-]+)', 'email', 'is'),
|
| 182 |
+
(r'phone\s*:?\s*([0-9\s\-\+\(\)]+)', 'phone', 'is'),
|
| 183 |
+
(r'address\s*:?\s*([A-Za-z0-9\s\-\.,]+)', 'address', 'is'),
|
| 184 |
+
(r'description\s*:?\s*([A-Za-z0-9\s\-\.,]+)', 'description', 'is'),
|
| 185 |
+
(r'type\s*:?\s*([A-Za-z0-9\s\-\.,]+)', 'type', 'is'),
|
| 186 |
+
(r'category\s*:?\s*([A-Za-z0-9\s\-\.,]+)', 'category', 'is'),
|
| 187 |
+
(r'status\s*:?\s*([A-Za-z0-9\s\-\.,]+)', 'status', 'is'),
|
| 188 |
+
(r'location\s*:?\s*([A-Za-z0-9\s\-\.,]+)', 'location', 'is'),
|
| 189 |
+
(r'department\s*:?\s*([A-Za-z0-9\s\-\.,]+)', 'department', 'is'),
|
| 190 |
+
(r'section\s*:?\s*([A-Za-z0-9\s\-\.,]+)', 'section', 'is'),
|
| 191 |
+
(r'amount\s*:?\s*\$?([0-9,]+\.?[0-9]*)', 'amount', 'is'),
|
| 192 |
+
(r'total\s*:?\s*\$?([0-9,]+\.?[0-9]*)', 'total', 'is'),
|
| 193 |
+
(r'price\s*:?\s*\$?([0-9,]+\.?[0-9]*)', 'price', 'is'),
|
| 194 |
+
(r'cost\s*:?\s*\$?([0-9,]+\.?[0-9]*)', 'cost', 'is'),
|
| 195 |
+
]
|
| 196 |
+
for line in lines:
|
| 197 |
+
line = line.strip()
|
| 198 |
+
if len(line) < 5:
|
| 199 |
+
continue
|
| 200 |
+
for pattern, subject, predicate in patterns:
|
| 201 |
+
match = re.search(pattern, line, re.IGNORECASE)
|
| 202 |
+
if match:
|
| 203 |
+
value = match.group(1).strip()
|
| 204 |
+
if value and len(value) > 1:
|
| 205 |
+
triples.append((subject, predicate, value))
|
| 206 |
+
break
|
| 207 |
+
kv_patterns = [
|
| 208 |
+
r'([A-Za-z\s]+):\s*([A-Za-z0-9\s\$\-\.\/,]+)',
|
| 209 |
+
r'([A-Za-z\s]+)\s*=\s*([A-Za-z0-9\s\$\-\.\/,]+)',
|
| 210 |
+
r'([A-Za-z\s]+)\s*-\s*([A-Za-z0-9\s\$\-\.\/,]+)',
|
| 211 |
+
]
|
| 212 |
+
for line in lines:
|
| 213 |
+
for pattern in kv_patterns:
|
| 214 |
+
match = re.search(pattern, line)
|
| 215 |
+
if match:
|
| 216 |
+
key = match.group(1).strip().lower().replace(' ', '_')
|
| 217 |
+
value = match.group(2).strip()
|
| 218 |
+
if len(key) > 2 and len(value) > 1:
|
| 219 |
+
triples.append((key, 'is', value))
|
| 220 |
+
return triples
|
| 221 |
+
|
| 222 |
+
def extract_regular_triples(text):
|
| 223 |
+
triples = []
|
| 224 |
+
sentences = re.split(r"[.?!\n]", text)
|
| 225 |
+
patterns = [
|
| 226 |
+
r"\s+(is|are|was|were)\s+",
|
| 227 |
+
r"\s+(has|have|had)\s+",
|
| 228 |
+
r"\s+(uses|used|using)\s+",
|
| 229 |
+
r"\s+(creates|created|creating)\s+",
|
| 230 |
+
r"\s+(develops|developed|developing)\s+",
|
| 231 |
+
r"\s+(leads|led|leading)\s+",
|
| 232 |
+
r"\s+(affects|affected|affecting)\s+",
|
| 233 |
+
r"\s+(contains|contained|containing)\s+",
|
| 234 |
+
r"\s+(includes|included|including)\s+",
|
| 235 |
+
r"\s+(requires|required|requiring)\s+",
|
| 236 |
+
r"\s+(causes|caused|causing)\s+",
|
| 237 |
+
r"\s+(results|resulted|resulting)\s+",
|
| 238 |
+
r"\s+(enables|enabled|enabling)\s+",
|
| 239 |
+
r"\s+(provides|provided|providing)\s+",
|
| 240 |
+
r"\s+(supports|supported|supporting)\s+",
|
| 241 |
+
r"\s+(located|situated|found)\s+",
|
| 242 |
+
r"\s+(connects|links|relates)\s+",
|
| 243 |
+
r"\s+(depends|relies|based)\s+",
|
| 244 |
+
r"\s+(represents|symbolizes|stands)\s+",
|
| 245 |
+
r"\s+(describes|explains|defines)\s+",
|
| 246 |
+
r"\s+(refers|referring|referenced)\s+",
|
| 247 |
+
r"\s+(concerns|concerning|concerned)\s+",
|
| 248 |
+
r"\s+(relates|relating|related)\s+",
|
| 249 |
+
]
|
| 250 |
+
for sentence in sentences:
|
| 251 |
+
sentence = sentence.strip()
|
| 252 |
+
if len(sentence) < 10:
|
| 253 |
+
continue
|
| 254 |
+
for pattern in patterns:
|
| 255 |
+
parts = re.split(pattern, sentence, maxsplit=1)
|
| 256 |
+
if len(parts) == 3:
|
| 257 |
+
subj, pred, obj = parts
|
| 258 |
+
subj = re.sub(r'^(the|a|an)\s+', '', subj.strip(), flags=re.IGNORECASE)
|
| 259 |
+
obj = re.sub(r'^(the|a|an)\s+', '', obj.strip(), flags=re.IGNORECASE)
|
| 260 |
+
if subj and pred and obj and len(subj) > 2 and len(obj) > 2:
|
| 261 |
+
triples.append((subj, pred.strip(), obj))
|
| 262 |
+
break
|
| 263 |
+
return triples
|
| 264 |
+
|
| 265 |
+
def extract_triples(text):
|
| 266 |
+
triples = []
|
| 267 |
+
entities = extract_entities(text)
|
| 268 |
+
for entity in entities:
|
| 269 |
+
triples.append((entity, 'type', 'entity'))
|
| 270 |
+
triples.extend(extract_structured_triples(text))
|
| 271 |
+
triples.extend(extract_regular_triples_improved(text, entities))
|
| 272 |
+
triples.extend(extract_regular_triples(text))
|
| 273 |
+
unique_triples = []
|
| 274 |
+
for s, p, o in triples:
|
| 275 |
+
if s and p and o and len(s) > 2 and len(p) > 1 and len(o) > 2:
|
| 276 |
+
s = s.strip()[:100]
|
| 277 |
+
p = p.strip()[:50]
|
| 278 |
+
o = o.strip()[:200]
|
| 279 |
+
if (s, p, o) not in unique_triples:
|
| 280 |
+
unique_triples.append((s, p, o))
|
| 281 |
+
return unique_triples
|
| 282 |
+
|
| 283 |
+
def add_to_graph(text):
|
| 284 |
+
new_triples = extract_triples(text)
|
| 285 |
+
for s, p, o in new_triples:
|
| 286 |
+
graph.add((rdflib.URIRef(f"urn:{s}"), rdflib.URIRef(f"urn:{p}"), rdflib.Literal(o)))
|
| 287 |
+
save_knowledge_graph()
|
| 288 |
+
return f" Added {len(new_triples)} new triples. Total facts stored: {len(graph)}.\n Saved"
|
| 289 |
+
|
| 290 |
+
def retrieve_context(question, limit=10):
|
| 291 |
+
matches = []
|
| 292 |
+
qwords = [w for w in question.lower().split() if w not in {
|
| 293 |
+
'the','a','an','and','or','but','in','on','at','to','for','of','with','by','is','are','was','were','be','been','have','has','had','do','does','did','will','would','could','should','may','might','can','what','how','when','where','why','who'
|
| 294 |
+
} and len(w) > 2]
|
| 295 |
+
scored_matches = []
|
| 296 |
+
for s, p, o in graph:
|
| 297 |
+
subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
|
| 298 |
+
predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
|
| 299 |
+
object_val = str(o)
|
| 300 |
+
fact_text = f"{subject} {predicate} {object_val}".lower()
|
| 301 |
+
score = 0
|
| 302 |
+
for word in qwords:
|
| 303 |
+
if word in fact_text:
|
| 304 |
+
score += 1
|
| 305 |
+
if word == subject.lower() or word == predicate.lower():
|
| 306 |
+
score += 2
|
| 307 |
+
if score > 0:
|
| 308 |
+
scored_matches.append((score, f"{subject} {predicate} {object_val}"))
|
| 309 |
+
scored_matches.sort(key=lambda x: x[0], reverse=True)
|
| 310 |
+
matches = [m[1] for m in scored_matches[:limit]]
|
| 311 |
+
if matches:
|
| 312 |
+
result = "**Relevant Knowledge:**\n"
|
| 313 |
+
for i, match in enumerate(matches, 1):
|
| 314 |
+
result += f"{i}. {match}\n"
|
| 315 |
+
return result
|
| 316 |
+
return "**No directly relevant facts found.**\n\nTry asking about topics that might be in your knowledge base, or add more knowledge first!"
|
| 317 |
+
|
| 318 |
+
def show_graph_contents():
|
| 319 |
+
if len(graph) == 0:
|
| 320 |
+
return "**Knowledge Graph Status: EMPTY**\n\n**How to build your knowledge base:**\n1. **Add text directly** - Paste any text in the 'Add Knowledge from Text' box above\n2. **Upload documents** - Use the file upload to process PDF, DOCX, TXT, CSV files\n3. **Extract facts** - The system will automatically extract knowledge from your content\n4. **Build knowledge** - Add more text or files to expand your knowledge base\n5. **Save knowledge** - Use 'Save Knowledge' to persist your data\n\n**Start by adding some text or uploading a document!**"
|
| 321 |
+
facts_by_subject = {}
|
| 322 |
+
all_facts = []
|
| 323 |
+
for s, p, o in graph:
|
| 324 |
+
subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
|
| 325 |
+
predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
|
| 326 |
+
object_val = str(o)
|
| 327 |
+
fact_text = f"{subject} {predicate} {object_val}"
|
| 328 |
+
all_facts.append(fact_text)
|
| 329 |
+
facts_by_subject.setdefault(subject, []).append(f"{predicate} {object_val}")
|
| 330 |
+
result = f"**Knowledge Graph Overview**\n"
|
| 331 |
+
result += f"**Total Facts:** {len(graph)}\n"
|
| 332 |
+
result += f"**Unique Subjects:** {len(facts_by_subject)}\n\n"
|
| 333 |
+
result += "## **Knowledge by Subject:**\n\n"
|
| 334 |
+
for i, (subject, facts) in enumerate(facts_by_subject.items()):
|
| 335 |
+
if i >= 10:
|
| 336 |
+
remaining = len(facts_by_subject) - 10
|
| 337 |
+
result += f"... and {remaining} more subjects\n"
|
| 338 |
+
break
|
| 339 |
+
result += f"**{subject}:**\n"
|
| 340 |
+
for fact in facts:
|
| 341 |
+
result += f" • {fact}\n"
|
| 342 |
+
result += "\n"
|
| 343 |
+
result += "## **All Facts:**\n\n"
|
| 344 |
+
for i, fact in enumerate(all_facts[:20]):
|
| 345 |
+
result += f"{i+1}. {fact}\n"
|
| 346 |
+
if len(all_facts) > 20:
|
| 347 |
+
result += f"\n... and {len(all_facts) - 20} more facts"
|
| 348 |
+
return result
|
| 349 |
+
|
| 350 |
+
def visualize_knowledge_graph():
|
| 351 |
+
if len(graph) == 0:
|
| 352 |
+
return "<p>No knowledge in graph. Add some text or upload a document first!</p>"
|
| 353 |
+
try:
|
| 354 |
+
G = nx.Graph()
|
| 355 |
+
fact_data = {}
|
| 356 |
+
for s, p, o in graph:
|
| 357 |
+
subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
|
| 358 |
+
predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
|
| 359 |
+
object_val = str(o)
|
| 360 |
+
subject_short = (subject[:30] + "...") if len(subject) > 30 else subject
|
| 361 |
+
object_short = (object_val[:30] + "...") if len(object_val) > 30 else object_val
|
| 362 |
+
if subject not in G:
|
| 363 |
+
G.add_node(subject, display=subject_short, node_type='subject')
|
| 364 |
+
if object_val not in G:
|
| 365 |
+
G.add_node(object_val, display=object_short, node_type='object')
|
| 366 |
+
G.add_edge(subject, object_val, label=predicate)
|
| 367 |
+
fact_data[(subject, object_val)] = f"{subject} {predicate} {object_val}"
|
| 368 |
+
pos = nx.spring_layout(G, k=2, iterations=100, seed=42)
|
| 369 |
+
import numpy as np
|
| 370 |
+
x_positions = [pos[n][0] for n in G.nodes()]
|
| 371 |
+
y_positions = [pos[n][1] for n in G.nodes()]
|
| 372 |
+
x_min, x_max = min(x_positions), max(x_positions)
|
| 373 |
+
y_min, y_max = min(y_positions), max(y_positions)
|
| 374 |
+
scale = min(500 / (x_max - x_min), 400 / (y_max - y_min)) if (x_max - x_min) > 0 and (y_max - y_min) > 0 else 50
|
| 375 |
+
offset_x = 350
|
| 376 |
+
offset_y = 300
|
| 377 |
+
svg_elements = []
|
| 378 |
+
for edge in G.edges():
|
| 379 |
+
x1 = pos[edge[0]][0] * scale + offset_x
|
| 380 |
+
y1 = pos[edge[0]][1] * scale + offset_y
|
| 381 |
+
x2 = pos[edge[1]][0] * scale + offset_x
|
| 382 |
+
y2 = pos[edge[1]][1] * scale + offset_y
|
| 383 |
+
edge_data = G[edge[0]][edge[1]]
|
| 384 |
+
label = edge_data.get('label', 'has')
|
| 385 |
+
svg_elements.append(f"""
|
| 386 |
+
<line x1="{x1}" y1="{y1}" x2="{x2}" y2="{y2}"
|
| 387 |
+
stroke="#999" stroke-width="2" opacity="0.5">
|
| 388 |
+
<title>{label}</title>
|
| 389 |
+
</line>
|
| 390 |
+
""")
|
| 391 |
+
node_info = []
|
| 392 |
+
for i, node in enumerate(G.nodes()):
|
| 393 |
+
x = pos[node][0] * scale + offset_x
|
| 394 |
+
y = pos[node][1] * scale + offset_y
|
| 395 |
+
display_name = G.nodes[node].get('display', node)
|
| 396 |
+
node_type = G.nodes[node].get('node_type', 'unknown')
|
| 397 |
+
color = '#4CAF50' if node_type == 'subject' else ('#2196F3' if node_type == 'object' else '#546E7A')
|
| 398 |
+
neighbors = list(G.neighbors(node))
|
| 399 |
+
neighbor_count = len(neighbors)
|
| 400 |
+
node_info.append(f"""
|
| 401 |
+
<circle cx="{x}" cy="{y}" r="{max(40, min(30, neighbor_count * 2 + 20))}"
|
| 402 |
+
fill="{color}" stroke="#fff" stroke-width="2">
|
| 403 |
+
<title>{display_name} ({neighbor_count} connections)</title>
|
| 404 |
+
</circle>
|
| 405 |
+
<text x="{x}" y="{y+6}" text-anchor="middle" font-size="15" font-weight="bold" fill="#000"
|
| 406 |
+
pointer-events="none">{display_name[:15]}</text>
|
| 407 |
+
""")
|
| 408 |
+
svg_content = '\n'.join(svg_elements + node_info)
|
| 409 |
+
html = f"""
|
| 410 |
+
<div style="width: 100%; min-height: 700px; max-height: 800px; background: white; border: 2px solid #ddd; border-radius: 10px; padding: 20px; position: relative; overflow: auto;">
|
| 411 |
+
<svg width="100%" height="550" style="border: 1px solid #ddd; border-radius: 5px; background: #f9f9f9; display: block;">
|
| 412 |
+
{svg_content}
|
| 413 |
+
</svg>
|
| 414 |
+
</div>
|
| 415 |
+
"""
|
| 416 |
+
return html
|
| 417 |
+
except Exception as e:
|
| 418 |
+
return f"<p style='color: red; padding: 20px;'>Error creating visualization: {e}</p>"
|
| 419 |
+
|
| 420 |
+
def delete_all_knowledge():
|
| 421 |
+
global graph
|
| 422 |
+
count = len(graph)
|
| 423 |
+
graph = rdflib.Graph()
|
| 424 |
+
save_knowledge_graph()
|
| 425 |
+
return f"🗑️ Deleted all {count} facts from the knowledge graph. Graph is now empty."
|
| 426 |
+
|
| 427 |
+
def delete_knowledge_by_keyword(keyword):
|
| 428 |
+
global graph
|
| 429 |
+
if not keyword or keyword.strip() == "":
|
| 430 |
+
return "⚠️ Please enter a keyword to search for."
|
| 431 |
+
keyword = keyword.strip().lower()
|
| 432 |
+
deleted_count = 0
|
| 433 |
+
facts_to_remove = []
|
| 434 |
+
for s, p, o in graph:
|
| 435 |
+
fact_text = f"{s} {p} {o}".lower()
|
| 436 |
+
if keyword in fact_text:
|
| 437 |
+
facts_to_remove.append((s, p, o))
|
| 438 |
+
for fact in facts_to_remove:
|
| 439 |
+
graph.remove(fact)
|
| 440 |
+
deleted_count += 1
|
| 441 |
+
if deleted_count > 0:
|
| 442 |
+
save_knowledge_graph()
|
| 443 |
+
return f"🗑️ Deleted {deleted_count} facts containing '{keyword}'"
|
| 444 |
+
else:
|
| 445 |
+
return f"ℹ️ No facts found containing '{keyword}'"
|
| 446 |
+
|
| 447 |
+
def delete_recent_knowledge(count=5):
|
| 448 |
+
global graph
|
| 449 |
+
if len(graph) == 0:
|
| 450 |
+
return "ℹ️ Knowledge graph is already empty."
|
| 451 |
+
facts = list(graph)
|
| 452 |
+
facts_to_remove = facts[-count:] if count < len(facts) else facts
|
| 453 |
+
for fact in facts_to_remove:
|
| 454 |
+
graph.remove(fact)
|
| 455 |
+
save_knowledge_graph()
|
| 456 |
+
return f"🗑️ Deleted {len(facts_to_remove)} most recent facts"
|
| 457 |
+
|
| 458 |
+
def list_facts_for_editing():
|
| 459 |
+
global fact_index
|
| 460 |
+
fact_index = {}
|
| 461 |
+
options = []
|
| 462 |
+
for i, (s, p, o) in enumerate(list(graph), start=1):
|
| 463 |
+
subject = str(s).split(':')[-1] if ':' in str(s) else str(s)
|
| 464 |
+
predicate = str(p).split(':')[-1] if ':' in str(p) else str(p)
|
| 465 |
+
object_val = str(o)
|
| 466 |
+
label = f"{i}. {subject} {predicate} {object_val}"
|
| 467 |
+
options.append(label)
|
| 468 |
+
fact_index[i] = (s, p, o)
|
| 469 |
+
return options
|
| 470 |
+
|
| 471 |
+
def load_fact_by_label(fact_label):
|
| 472 |
+
if not fact_label:
|
| 473 |
+
return None
|
| 474 |
+
try:
|
| 475 |
+
fact_id = int(fact_label.split('.', 1)[0].strip())
|
| 476 |
+
return fact_index.get(fact_id)
|
| 477 |
+
except Exception:
|
| 478 |
+
return None
|
| 479 |
+
|
| 480 |
+
def import_knowledge_from_json_file(file):
|
| 481 |
+
try:
|
| 482 |
+
if file is None:
|
| 483 |
+
return "⚠️ No file selected."
|
| 484 |
+
file_path = file.name if hasattr(file, 'name') else str(file)
|
| 485 |
+
if not os.path.exists(file_path):
|
| 486 |
+
return f"⚠️ File not found: {file_path}"
|
| 487 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 488 |
+
data = json.load(f)
|
| 489 |
+
if isinstance(data, dict) and 'facts' in data:
|
| 490 |
+
facts = data['facts']
|
| 491 |
+
elif isinstance(data, list):
|
| 492 |
+
facts = data
|
| 493 |
+
else:
|
| 494 |
+
return "❌ Unsupported JSON structure. Expect an object with 'facts' or a list of facts."
|
| 495 |
+
added = 0
|
| 496 |
+
skipped = 0
|
| 497 |
+
for fact in facts:
|
| 498 |
+
try:
|
| 499 |
+
subject = fact.get('subject') or fact.get('full_subject')
|
| 500 |
+
predicate = fact.get('predicate') or fact.get('full_predicate')
|
| 501 |
+
obj = fact.get('object') or fact.get('full_object')
|
| 502 |
+
if not subject or not predicate or obj is None:
|
| 503 |
+
skipped += 1
|
| 504 |
+
continue
|
| 505 |
+
s_ref = rdflib.URIRef(subject if str(subject).startswith('urn:') else f"urn:{subject}")
|
| 506 |
+
p_ref = rdflib.URIRef(predicate if str(predicate).startswith('urn:') else f"urn:{predicate}")
|
| 507 |
+
o_lit = rdflib.Literal(obj)
|
| 508 |
+
graph.add((s_ref, p_ref, o_lit))
|
| 509 |
+
added += 1
|
| 510 |
+
except Exception:
|
| 511 |
+
skipped += 1
|
| 512 |
+
save_knowledge_graph()
|
| 513 |
+
return f"✅ Imported {added} facts. Skipped {skipped}. Total facts: {len(graph)}."
|
| 514 |
+
except Exception as e:
|
| 515 |
+
return f"❌ Import failed: {e}"
|
| 516 |
+
|
| 517 |
+
|
responses.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from knowledge import retrieve_context
|
| 2 |
+
|
| 3 |
+
def generate_document_summary(context):
|
| 4 |
+
if not context or "No directly relevant facts found" in context:
|
| 5 |
+
return "I don't have enough information about this document to provide a summary. Please add more knowledge to the knowledge base first."
|
| 6 |
+
facts = []
|
| 7 |
+
for line in context.split('\n'):
|
| 8 |
+
if line.strip() and not line.startswith('**'):
|
| 9 |
+
facts.append(line.strip())
|
| 10 |
+
document_type = "document"
|
| 11 |
+
key_info = []
|
| 12 |
+
for fact in facts:
|
| 13 |
+
fact_lower = fact.lower()
|
| 14 |
+
if 'invoice' in fact_lower or 'bill' in fact_lower:
|
| 15 |
+
document_type = "invoice"
|
| 16 |
+
elif 'contract' in fact_lower or 'agreement' in fact_lower:
|
| 17 |
+
document_type = "contract"
|
| 18 |
+
elif 'report' in fact_lower or 'analysis' in fact_lower:
|
| 19 |
+
document_type = "report"
|
| 20 |
+
elif any(k in fact_lower for k in ['company','organization','name','amount','total','cost','price','date','time','address','location','description','type','id','number','code']):
|
| 21 |
+
key_info.append(fact)
|
| 22 |
+
summary = f"Based on the information in my knowledge base, this appears to be a **{document_type}** document. "
|
| 23 |
+
if key_info:
|
| 24 |
+
summary += "Here are the key details I found:\n\n"
|
| 25 |
+
for info in key_info[:5]:
|
| 26 |
+
summary += f"• {info}\n"
|
| 27 |
+
else:
|
| 28 |
+
summary += "However, I don't have enough specific details to provide a comprehensive summary."
|
| 29 |
+
return summary
|
| 30 |
+
|
| 31 |
+
def _facts_from_context(context):
|
| 32 |
+
facts = []
|
| 33 |
+
for line in context.split('\n'):
|
| 34 |
+
if line.strip() and not line.startswith('**'):
|
| 35 |
+
facts.append(line.strip())
|
| 36 |
+
return facts
|
| 37 |
+
|
| 38 |
+
def generate_what_response(message, context):
|
| 39 |
+
facts = _facts_from_context(context)
|
| 40 |
+
if not facts:
|
| 41 |
+
return "I don't have specific information about that in my knowledge base."
|
| 42 |
+
response = "Based on my knowledge base, here's what I can tell you:\n\n"
|
| 43 |
+
for fact in facts[:3]:
|
| 44 |
+
response += f"• {fact}\n"
|
| 45 |
+
if len(facts) > 3:
|
| 46 |
+
response += f"\nI have {len(facts)} total facts about this topic in my knowledge base."
|
| 47 |
+
return response
|
| 48 |
+
|
| 49 |
+
def generate_who_response(message, context):
|
| 50 |
+
facts = _facts_from_context(context)
|
| 51 |
+
facts = [f for f in facts if any(k in f.lower() for k in ['company','name','person','επωνυμία','εταιρεία'])]
|
| 52 |
+
if not facts:
|
| 53 |
+
return "I don't have specific information about people or companies in my knowledge base."
|
| 54 |
+
return "Here's what I know about people/entities:\n\n" + "\n".join(f"• {f}" for f in facts)
|
| 55 |
+
|
| 56 |
+
def generate_when_response(message, context):
|
| 57 |
+
facts = _facts_from_context(context)
|
| 58 |
+
facts = [f for f in facts if any(k in f.lower() for k in ['date','ημερομηνία','due','προθεσμία'])]
|
| 59 |
+
if not facts:
|
| 60 |
+
return "I don't have specific date information in my knowledge base."
|
| 61 |
+
return "Here's the date information I have:\n\n" + "\n".join(f"• {f}" for f in facts)
|
| 62 |
+
|
| 63 |
+
def generate_where_response(message, context):
|
| 64 |
+
facts = _facts_from_context(context)
|
| 65 |
+
facts = [f for f in facts if any(k in f.lower() for k in ['address','διεύθυνση','location','place'])]
|
| 66 |
+
if not facts:
|
| 67 |
+
return "I don't have specific location information in my knowledge base."
|
| 68 |
+
return "Here's the location information I have:\n\n" + "\n".join(f"• {f}" for f in facts)
|
| 69 |
+
|
| 70 |
+
def generate_amount_response(message, context):
|
| 71 |
+
facts = _facts_from_context(context)
|
| 72 |
+
facts = [f for f in facts if any(k in f.lower() for k in ['amount','total','price','cost','σύνολο','φόρος','€','$'])]
|
| 73 |
+
if not facts:
|
| 74 |
+
return "I don't have specific financial information in my knowledge base."
|
| 75 |
+
return "Here's the financial information I have:\n\n" + "\n".join(f"• {f}" for f in facts)
|
| 76 |
+
|
| 77 |
+
def generate_general_response(message, context):
|
| 78 |
+
facts = _facts_from_context(context)
|
| 79 |
+
if not facts:
|
| 80 |
+
return "I don't have relevant information about that in my knowledge base."
|
| 81 |
+
response = "Based on my knowledge base, here's what I can tell you:\n\n"
|
| 82 |
+
for fact in facts[:4]:
|
| 83 |
+
response += f"• {fact}\n"
|
| 84 |
+
if len(facts) > 4:
|
| 85 |
+
response += f"\nI have {len(facts)} total relevant facts about this topic."
|
| 86 |
+
return response
|
| 87 |
+
|
| 88 |
+
def generate_intelligent_response(message, context, system_message):
|
| 89 |
+
message_lower = message.lower()
|
| 90 |
+
if any(phrase in message_lower for phrase in [
|
| 91 |
+
'what is the document about', 'whats the document about', 'what is this about', 'whats this about',
|
| 92 |
+
'describe the document', 'summarize the document', 'what does this contain', 'what is this about'
|
| 93 |
+
]):
|
| 94 |
+
return generate_document_summary(context)
|
| 95 |
+
elif message_lower.startswith('what'):
|
| 96 |
+
return generate_what_response(message, context)
|
| 97 |
+
elif message_lower.startswith('who'):
|
| 98 |
+
return generate_who_response(message, context)
|
| 99 |
+
elif message_lower.startswith('when'):
|
| 100 |
+
return generate_when_response(message, context)
|
| 101 |
+
elif message_lower.startswith('where'):
|
| 102 |
+
return generate_where_response(message, context)
|
| 103 |
+
elif any(phrase in message_lower for phrase in ['how much','amount','total','cost','price']):
|
| 104 |
+
return generate_amount_response(message, context)
|
| 105 |
+
else:
|
| 106 |
+
return generate_general_response(message, context)
|
| 107 |
+
|
| 108 |
+
def respond(message, history, system_message="You are an intelligent assistant that answers questions based on factual information from a knowledge base. You provide clear, accurate, and helpful responses. When you have relevant information, you share it directly. When you don't have enough information, you clearly state this limitation. You always stay grounded in the facts provided and never hallucinate information."):
|
| 109 |
+
context = retrieve_context(message)
|
| 110 |
+
return generate_intelligent_response(message, context, system_message)
|
| 111 |
+
|
| 112 |
+
|