edouardlgp commited on
Commit
9049d6c
Β·
verified Β·
1 Parent(s): 6705ed6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -55
app.py CHANGED
@@ -6,8 +6,6 @@ import time
6
  import traceback
7
  from huggingface_hub import snapshot_download
8
  from pleias_rag_interface import RAGWithCitations
9
- from fastapi import FastAPI
10
- from fastapi.staticfiles import StaticFiles
11
 
12
  # Debugging setup
13
  DEBUG = True
@@ -20,6 +18,7 @@ def log_debug(message):
20
  full_message = f"[{timestamp}] {message}"
21
  debug_messages.append(full_message)
22
  print(full_message) # Print to console
 
23
  if len(debug_messages) > 20:
24
  debug_messages.pop(0)
25
  return "\n".join(debug_messages)
@@ -36,38 +35,50 @@ if not os.path.exists(MODEL_CACHE_DIR):
36
  log_debug("Downloading model...")
37
  snapshot_download(repo_id=MODEL_REPO, local_dir=MODEL_CACHE_DIR)
38
 
 
39
  log_debug("Initializing RAG model...")
40
  try:
41
  rag = RAGWithCitations(model_path_or_name=MODEL_CACHE_DIR)
42
-
43
- # Configure tokenizer and model properly
44
  if hasattr(rag, "model"):
 
45
  if hasattr(rag, "tokenizer"):
46
- # Set pad token if not set
47
  if rag.tokenizer.pad_token is None:
48
  rag.tokenizer.pad_token = rag.tokenizer.eos_token
49
- rag.tokenizer.padding_side = "left"
50
-
51
- # Configure generation settings
52
  rag.model.config.pad_token_id = rag.tokenizer.pad_token_id
53
  rag.model.generation_config.pad_token_id = rag.tokenizer.pad_token_id
 
 
54
  rag.model.generation_config.do_sample = True
55
- rag.model.generation_config.top_p = 0.95
56
-
57
- log_debug("βœ… Model loaded with configuration:")
58
- log_debug(f"Pad token: {rag.tokenizer.pad_token}")
59
- log_debug(f"Generation config: {rag.model.generation_config}")
60
-
 
 
 
61
  except Exception as e:
62
  log_debug(f"❌ Model initialization failed: {str(e)}")
63
  raise
64
 
65
- def extract_text_from_pdf_url(url):
 
 
66
  """Extract text from PDF with debug logging"""
67
  debug_state = log_debug(f"πŸ“„ Fetching PDF: {url[:60]}...")
68
  try:
 
69
  response = requests.get(url, timeout=30)
70
  response.raise_for_status()
 
 
 
71
  doc = fitz.open(stream=response.content, filetype="pdf")
72
  text = ""
73
  for page in doc:
@@ -79,65 +90,64 @@ def extract_text_from_pdf_url(url):
79
  debug_state = log_debug(error_msg)
80
  return f"[Error loading PDF: {str(e)}]", debug_state
81
 
82
- def generate_answer(query, pdf_urls_str):
83
  """Main processing function with debug output"""
84
  try:
85
  debug_state = log_debug(f"πŸ” New query: {query}")
86
-
87
  if not query or not pdf_urls_str:
88
  debug_state = log_debug("❌ Missing question or PDF URLs")
89
  return "Please provide both inputs", debug_state
90
-
91
  pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()]
92
  sources = []
93
  feedback = "### PDF Load Report:\n"
94
-
 
 
95
  for url in pdf_urls:
96
- text, debug_state = extract_text_from_pdf_url(url)
97
  if not text.startswith("[Error"):
98
- # Create source document with metadata
99
- sources.append({
100
- "text": text,
101
- "metadata": {"source": url}
102
- })
103
  feedback += f"- βœ… Loaded: {url[:80]}\n"
104
  else:
105
  feedback += f"- ❌ Failed: {url[:80]}\n"
106
-
107
  if not sources:
108
  debug_state = log_debug("❌ No valid PDFs processed")
109
  return feedback + "\nNo valid PDFs processed", debug_state
110
-
111
  debug_state = log_debug(f"🧠 Generating answer using {len(sources)} sources...")
112
-
 
113
  try:
114
- # Generate without attention_mask parameter
115
  response = rag.generate(query, sources)
116
-
 
 
117
  answer = response.get('raw_response', 'No response generated')
118
  backend = response.get('backend_used', 'unknown')
119
-
120
- debug_state = log_debug(f"πŸ’‘ Answer generated using {backend}")
121
- full_output = f"{feedback}\n\n### Answer:\n{answer}\n\n_Generated using {backend}_"
 
 
122
  return full_output, debug_state
123
-
124
  except Exception as e:
125
  error_msg = f"❌ Generation error: {str(e)}"
126
  debug_state = log_debug(error_msg)
127
  debug_state = log_debug(traceback.format_exc())
128
  return feedback + f"\n\n❌ Error: {str(e)}", debug_state
129
-
130
  except Exception as e:
131
  error_msg = f"❌ System error: {str(e)}"
132
  debug_state = log_debug(error_msg)
133
  debug_state = log_debug(traceback.format_exc())
134
  return error_msg, debug_state
135
 
136
- # Create the FastAPI app
137
- app = FastAPI()
138
-
139
- # Mount Gradio app
140
- gradio_app = gr.Blocks(title="Pleias RAG QA", css="""
141
  .debug-console {
142
  font-family: monospace;
143
  max-height: 400px;
@@ -146,38 +156,44 @@ gradio_app = gr.Blocks(title="Pleias RAG QA", css="""
146
  padding: 10px;
147
  border-radius: 5px;
148
  }
149
- """)
150
-
151
- with gradio_app:
 
 
 
152
  gr.Markdown("# Retrieval Generation from PDF files with a 350MB Pocket Size Model from Pleias")
153
-
154
  with gr.Row():
155
  with gr.Column():
156
  question = gr.Textbox(label="Your Question", placeholder="What is this document about?")
157
- pdf_urls = gr.Textbox(lines=5, label="PDF URLs (one per line)",
158
  placeholder="https://example.com/doc1.pdf")
159
  submit_btn = gr.Button("Submit", variant="primary")
160
-
161
  with gr.Column():
162
  output = gr.Markdown(label="Model Response")
163
  if DEBUG:
 
164
  debug_console = gr.Textbox(
165
- label="Debug Console",
166
  interactive=False,
167
  lines=15,
168
  elem_classes=["debug-console"]
169
  )
170
-
 
171
  submit_btn.click(
172
  fn=generate_answer,
173
- inputs=[question, pdf_urls],
174
- outputs=[output, debug_console],
175
  )
176
 
177
- # Mount the Gradio app to FastAPI
178
- app.mount("/", gradio_app)
179
-
180
  if __name__ == "__main__":
181
  log_debug("πŸš€ Launching interface...")
182
- import uvicorn
183
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
6
  import traceback
7
  from huggingface_hub import snapshot_download
8
  from pleias_rag_interface import RAGWithCitations
 
 
9
 
10
  # Debugging setup
11
  DEBUG = True
 
18
  full_message = f"[{timestamp}] {message}"
19
  debug_messages.append(full_message)
20
  print(full_message) # Print to console
21
+ # Keep only the last 20 messages
22
  if len(debug_messages) > 20:
23
  debug_messages.pop(0)
24
  return "\n".join(debug_messages)
 
35
  log_debug("Downloading model...")
36
  snapshot_download(repo_id=MODEL_REPO, local_dir=MODEL_CACHE_DIR)
37
 
38
+
39
  log_debug("Initializing RAG model...")
40
  try:
41
  rag = RAGWithCitations(model_path_or_name=MODEL_CACHE_DIR)
42
+
43
+ # Fix the warnings by properly configuring generation parameters
44
  if hasattr(rag, "model"):
45
+ # Configure tokenizer
46
  if hasattr(rag, "tokenizer"):
 
47
  if rag.tokenizer.pad_token is None:
48
  rag.tokenizer.pad_token = rag.tokenizer.eos_token
49
+ rag.tokenizer.padding_side = "left" # For batch generation
50
+
51
+ # Configure model generation settings
52
  rag.model.config.pad_token_id = rag.tokenizer.pad_token_id
53
  rag.model.generation_config.pad_token_id = rag.tokenizer.pad_token_id
54
+
55
+ # Fix the do_sample/top_p warning
56
  rag.model.generation_config.do_sample = True
57
+ rag.model.generation_config.top_p = 0.95 # Explicitly set to match warning
58
+
59
+ # Configure attention mask handling
60
+ rag.model.config.use_cache = True
61
+
62
+ log_debug("βœ… Model loaded successfully with configuration:")
63
+ log_debug(f" - Pad token: {rag.tokenizer.pad_token} (ID: {rag.tokenizer.pad_token_id})")
64
+ log_debug(f" - Generation config: {rag.model.generation_config}")
65
+
66
  except Exception as e:
67
  log_debug(f"❌ Model initialization failed: {str(e)}")
68
  raise
69
 
70
+
71
+
72
+ def extract_text_from_pdf_url(url, debug_state):
73
  """Extract text from PDF with debug logging"""
74
  debug_state = log_debug(f"πŸ“„ Fetching PDF: {url[:60]}...")
75
  try:
76
+ start_time = time.time()
77
  response = requests.get(url, timeout=30)
78
  response.raise_for_status()
79
+ load_time = time.time() - start_time
80
+ debug_state = log_debug(f"⏳ PDF downloaded in {load_time:.2f}s (size: {len(response.content)/1024:.1f}KB)")
81
+
82
  doc = fitz.open(stream=response.content, filetype="pdf")
83
  text = ""
84
  for page in doc:
 
90
  debug_state = log_debug(error_msg)
91
  return f"[Error loading PDF: {str(e)}]", debug_state
92
 
93
+ def generate_answer(query, pdf_urls_str, debug_state=""):
94
  """Main processing function with debug output"""
95
  try:
96
  debug_state = log_debug(f"πŸ” New query: {query}")
97
+
98
  if not query or not pdf_urls_str:
99
  debug_state = log_debug("❌ Missing question or PDF URLs")
100
  return "Please provide both inputs", debug_state
101
+
102
  pdf_urls = [url.strip() for url in pdf_urls_str.strip().split("\n") if url.strip()]
103
  sources = []
104
  feedback = "### PDF Load Report:\n"
105
+
106
+ debug_state = log_debug(f"Processing {len(pdf_urls)} PDF URLs...")
107
+
108
  for url in pdf_urls:
109
+ text, debug_state = extract_text_from_pdf_url(url, debug_state)
110
  if not text.startswith("[Error"):
111
+ sources.append({"text": text, "metadata": {"source": url}})
 
 
 
 
112
  feedback += f"- βœ… Loaded: {url[:80]}\n"
113
  else:
114
  feedback += f"- ❌ Failed: {url[:80]}\n"
115
+
116
  if not sources:
117
  debug_state = log_debug("❌ No valid PDFs processed")
118
  return feedback + "\nNo valid PDFs processed", debug_state
119
+
120
  debug_state = log_debug(f"🧠 Generating answer using {len(sources)} sources...")
121
+ start_time = time.time()
122
+
123
  try:
 
124
  response = rag.generate(query, sources)
125
+ gen_time = time.time() - start_time
126
+ debug_state = log_debug(f"⚑ Generation completed in {gen_time:.2f}s")
127
+
128
  answer = response.get('raw_response', 'No response generated')
129
  backend = response.get('backend_used', 'unknown')
130
+
131
+ debug_state = log_debug(f"πŸ’‘ Answer preview: {answer[:200]}...")
132
+ debug_state = log_debug(f"πŸ› οΈ Backend used: {backend}")
133
+
134
+ full_output = f"{feedback}\n\n### Answer:\n{answer}\n\n_Generated in {gen_time:.2f}s using {backend}_"
135
  return full_output, debug_state
136
+
137
  except Exception as e:
138
  error_msg = f"❌ Generation error: {str(e)}"
139
  debug_state = log_debug(error_msg)
140
  debug_state = log_debug(traceback.format_exc())
141
  return feedback + f"\n\n❌ Error: {str(e)}", debug_state
142
+
143
  except Exception as e:
144
  error_msg = f"❌ System error: {str(e)}"
145
  debug_state = log_debug(error_msg)
146
  debug_state = log_debug(traceback.format_exc())
147
  return error_msg, debug_state
148
 
149
+ # Create the Gradio interface
150
+ with gr.Blocks(title="Pleias RAG QA", css="""
 
 
 
151
  .debug-console {
152
  font-family: monospace;
153
  max-height: 400px;
 
156
  padding: 10px;
157
  border-radius: 5px;
158
  }
159
+ .debug-title {
160
+ font-weight: bold;
161
+ margin-bottom: 5px;
162
+ }
163
+ """) as demo:
164
+
165
  gr.Markdown("# Retrieval Generation from PDF files with a 350MB Pocket Size Model from Pleias")
166
+
167
  with gr.Row():
168
  with gr.Column():
169
  question = gr.Textbox(label="Your Question", placeholder="What is this document about?")
170
+ pdf_urls = gr.Textbox(lines=5, label="PDF URLs (one per line)",
171
  placeholder="https://example.com/doc1.pdf")
172
  submit_btn = gr.Button("Submit", variant="primary")
173
+
174
  with gr.Column():
175
  output = gr.Markdown(label="Model Response")
176
  if DEBUG:
177
+ gr.Markdown("### Debug Console", elem_classes=["debug-title"])
178
  debug_console = gr.Textbox(
179
+ label="",
180
  interactive=False,
181
  lines=15,
182
  elem_classes=["debug-console"]
183
  )
184
+
185
+ # Handle submission
186
  submit_btn.click(
187
  fn=generate_answer,
188
+ inputs=[question, pdf_urls] + ([debug_console] if DEBUG else []),
189
+ outputs=[output, debug_console] if DEBUG else [output],
190
  )
191
 
 
 
 
192
  if __name__ == "__main__":
193
  log_debug("πŸš€ Launching interface...")
194
+ demo.launch(
195
+ server_port=7860,
196
+ server_name="0.0.0.0",
197
+ show_error=True,
198
+ debug=DEBUG
199
+ )