shivXy commited on
Commit
480bba3
Β·
1 Parent(s): 9e78dc8

adding doc and reader function

Browse files
app.py CHANGED
@@ -10,7 +10,6 @@ from langchain_core.messages import HumanMessage
10
  from langgraph.graph.message import add_messages
11
  from qdrant_client import QdrantClient
12
  from langchain_openai import OpenAIEmbeddings
13
- from langchain_community.document_loaders import DirectoryLoader
14
 
15
  import os
16
 
@@ -58,28 +57,31 @@ def search(query_vector, top_k=1) -> list:
58
 
59
  return return_hits
60
 
61
- import os
62
- from langchain.document_loaders import DirectoryLoader
63
-
64
  def get_document_by_name(doc_name: str) -> str:
65
- """Retrieve a document from the list of loaded documents by its name from QD."""
 
 
 
 
66
 
67
  # βœ… Replace `.pdf` with `.html`
68
  html_doc_name = doc_name.replace(".pdf", ".html")
69
-
70
- # βœ… Define the directory path
71
- path = "data/"
72
-
73
- # βœ… Load only `.html` files
74
- loader = DirectoryLoader(path, glob="*.html")
75
- docs = loader.load()
76
-
77
- # βœ… Search for the document by name
78
- for doc in docs:
79
- if os.path.basename(doc.metadata.get("source", "")) == html_doc_name:
80
- return doc.page_content # βœ… Return the document content
81
-
82
- return "No file found" # βœ… Return this if no matching file is found
 
 
83
 
84
 
85
  # **User question prompt**
 
10
  from langgraph.graph.message import add_messages
11
  from qdrant_client import QdrantClient
12
  from langchain_openai import OpenAIEmbeddings
 
13
 
14
  import os
15
 
 
57
 
58
  return return_hits
59
 
 
 
 
60
  def get_document_by_name(doc_name: str) -> str:
61
+ """Retrieve the raw HTML content of a document by its name from the `data/` folder."""
62
+
63
+ # βœ… Get the absolute path of the `data/` folder
64
+ script_dir = os.path.dirname(os.path.abspath(__file__))
65
+ data_path = os.path.join(script_dir, "data")
66
 
67
  # βœ… Replace `.pdf` with `.html`
68
  html_doc_name = doc_name.replace(".pdf", ".html")
69
+ full_path = os.path.join(data_path, html_doc_name)
70
+
71
+ # βœ… Check if the file exists
72
+ if not os.path.exists(full_path):
73
+ print(f"⚠️ File not found: {full_path}")
74
+ return "No file found"
75
+
76
+ try:
77
+ # βœ… Open and read the file content
78
+ with open(full_path, "r", encoding="utf-8") as file:
79
+ content = file.read()
80
+ return content # βœ… Return the raw HTML content
81
+
82
+ except Exception as e:
83
+ print(f"❌ Error reading file {full_path}: {str(e)}")
84
+ return "Error reading file"
85
 
86
 
87
  # **User question prompt**
data/{Tennis elbow graded exercise .html β†’ Tennis elbow graded exercise.html} RENAMED
File without changes
pyproject.toml CHANGED
@@ -11,6 +11,7 @@ dependencies = [
11
  "langchain-openai>=0.3.7",
12
  "langchain-qdrant>=0.2.0",
13
  "langgraph>=0.2.74",
 
14
  "qdrant-client>=1.13.2",
15
  "unstructured>=0.14.8",
16
  "websockets>=15.0",
 
11
  "langchain-openai>=0.3.7",
12
  "langchain-qdrant>=0.2.0",
13
  "langgraph>=0.2.74",
14
+ "nltk>=3.9.1",
15
  "qdrant-client>=1.13.2",
16
  "unstructured>=0.14.8",
17
  "websockets>=15.0",
uv.lock CHANGED
@@ -1159,6 +1159,7 @@ dependencies = [
1159
  { name = "langchain-openai" },
1160
  { name = "langchain-qdrant" },
1161
  { name = "langgraph" },
 
1162
  { name = "qdrant-client" },
1163
  { name = "unstructured" },
1164
  { name = "websockets" },
@@ -1172,6 +1173,7 @@ requires-dist = [
1172
  { name = "langchain-openai", specifier = ">=0.3.7" },
1173
  { name = "langchain-qdrant", specifier = ">=0.2.0" },
1174
  { name = "langgraph", specifier = ">=0.2.74" },
 
1175
  { name = "qdrant-client", specifier = ">=1.13.2" },
1176
  { name = "unstructured", specifier = ">=0.14.8" },
1177
  { name = "websockets", specifier = ">=15.0" },
 
1159
  { name = "langchain-openai" },
1160
  { name = "langchain-qdrant" },
1161
  { name = "langgraph" },
1162
+ { name = "nltk" },
1163
  { name = "qdrant-client" },
1164
  { name = "unstructured" },
1165
  { name = "websockets" },
 
1173
  { name = "langchain-openai", specifier = ">=0.3.7" },
1174
  { name = "langchain-qdrant", specifier = ">=0.2.0" },
1175
  { name = "langgraph", specifier = ">=0.2.74" },
1176
+ { name = "nltk", specifier = ">=3.9.1" },
1177
  { name = "qdrant-client", specifier = ">=1.13.2" },
1178
  { name = "unstructured", specifier = ">=0.14.8" },
1179
  { name = "websockets", specifier = ">=15.0" },