adding doc and reader function
Browse files- app.py +21 -19
- data/{Tennis elbow graded exercise .html β Tennis elbow graded exercise.html} +0 -0
- pyproject.toml +1 -0
- uv.lock +2 -0
app.py
CHANGED
@@ -10,7 +10,6 @@ from langchain_core.messages import HumanMessage
|
|
10 |
from langgraph.graph.message import add_messages
|
11 |
from qdrant_client import QdrantClient
|
12 |
from langchain_openai import OpenAIEmbeddings
|
13 |
-
from langchain_community.document_loaders import DirectoryLoader
|
14 |
|
15 |
import os
|
16 |
|
@@ -58,28 +57,31 @@ def search(query_vector, top_k=1) -> list:
|
|
58 |
|
59 |
return return_hits
|
60 |
|
61 |
-
import os
|
62 |
-
from langchain.document_loaders import DirectoryLoader
|
63 |
-
|
64 |
def get_document_by_name(doc_name: str) -> str:
|
65 |
-
"""Retrieve
|
|
|
|
|
|
|
|
|
66 |
|
67 |
# β
Replace `.pdf` with `.html`
|
68 |
html_doc_name = doc_name.replace(".pdf", ".html")
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
83 |
|
84 |
|
85 |
# **User question prompt**
|
|
|
10 |
from langgraph.graph.message import add_messages
|
11 |
from qdrant_client import QdrantClient
|
12 |
from langchain_openai import OpenAIEmbeddings
|
|
|
13 |
|
14 |
import os
|
15 |
|
|
|
57 |
|
58 |
return return_hits
|
59 |
|
|
|
|
|
|
|
60 |
def get_document_by_name(doc_name: str) -> str:
|
61 |
+
"""Retrieve the raw HTML content of a document by its name from the `data/` folder."""
|
62 |
+
|
63 |
+
# β
Get the absolute path of the `data/` folder
|
64 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
65 |
+
data_path = os.path.join(script_dir, "data")
|
66 |
|
67 |
# β
Replace `.pdf` with `.html`
|
68 |
html_doc_name = doc_name.replace(".pdf", ".html")
|
69 |
+
full_path = os.path.join(data_path, html_doc_name)
|
70 |
+
|
71 |
+
# β
Check if the file exists
|
72 |
+
if not os.path.exists(full_path):
|
73 |
+
print(f"β οΈ File not found: {full_path}")
|
74 |
+
return "No file found"
|
75 |
+
|
76 |
+
try:
|
77 |
+
# β
Open and read the file content
|
78 |
+
with open(full_path, "r", encoding="utf-8") as file:
|
79 |
+
content = file.read()
|
80 |
+
return content # β
Return the raw HTML content
|
81 |
+
|
82 |
+
except Exception as e:
|
83 |
+
print(f"β Error reading file {full_path}: {str(e)}")
|
84 |
+
return "Error reading file"
|
85 |
|
86 |
|
87 |
# **User question prompt**
|
data/{Tennis elbow graded exercise .html β Tennis elbow graded exercise.html}
RENAMED
File without changes
|
pyproject.toml
CHANGED
@@ -11,6 +11,7 @@ dependencies = [
|
|
11 |
"langchain-openai>=0.3.7",
|
12 |
"langchain-qdrant>=0.2.0",
|
13 |
"langgraph>=0.2.74",
|
|
|
14 |
"qdrant-client>=1.13.2",
|
15 |
"unstructured>=0.14.8",
|
16 |
"websockets>=15.0",
|
|
|
11 |
"langchain-openai>=0.3.7",
|
12 |
"langchain-qdrant>=0.2.0",
|
13 |
"langgraph>=0.2.74",
|
14 |
+
"nltk>=3.9.1",
|
15 |
"qdrant-client>=1.13.2",
|
16 |
"unstructured>=0.14.8",
|
17 |
"websockets>=15.0",
|
uv.lock
CHANGED
@@ -1159,6 +1159,7 @@ dependencies = [
|
|
1159 |
{ name = "langchain-openai" },
|
1160 |
{ name = "langchain-qdrant" },
|
1161 |
{ name = "langgraph" },
|
|
|
1162 |
{ name = "qdrant-client" },
|
1163 |
{ name = "unstructured" },
|
1164 |
{ name = "websockets" },
|
@@ -1172,6 +1173,7 @@ requires-dist = [
|
|
1172 |
{ name = "langchain-openai", specifier = ">=0.3.7" },
|
1173 |
{ name = "langchain-qdrant", specifier = ">=0.2.0" },
|
1174 |
{ name = "langgraph", specifier = ">=0.2.74" },
|
|
|
1175 |
{ name = "qdrant-client", specifier = ">=1.13.2" },
|
1176 |
{ name = "unstructured", specifier = ">=0.14.8" },
|
1177 |
{ name = "websockets", specifier = ">=15.0" },
|
|
|
1159 |
{ name = "langchain-openai" },
|
1160 |
{ name = "langchain-qdrant" },
|
1161 |
{ name = "langgraph" },
|
1162 |
+
{ name = "nltk" },
|
1163 |
{ name = "qdrant-client" },
|
1164 |
{ name = "unstructured" },
|
1165 |
{ name = "websockets" },
|
|
|
1173 |
{ name = "langchain-openai", specifier = ">=0.3.7" },
|
1174 |
{ name = "langchain-qdrant", specifier = ">=0.2.0" },
|
1175 |
{ name = "langgraph", specifier = ">=0.2.74" },
|
1176 |
+
{ name = "nltk", specifier = ">=3.9.1" },
|
1177 |
{ name = "qdrant-client", specifier = ">=1.13.2" },
|
1178 |
{ name = "unstructured", specifier = ">=0.14.8" },
|
1179 |
{ name = "websockets", specifier = ">=15.0" },
|