sccastillo commited on
Commit
750c247
·
1 Parent(s): f846da5
Files changed (3) hide show
  1. README.md +31 -4
  2. app.py +632 -212
  3. research_team.py +148 -104
README.md CHANGED
@@ -15,13 +15,40 @@ Scientific research FastAPI application deployed on Hugging Face Spaces.
15
 
16
  ## Features
17
 
18
- - FastAPI web application
19
- - Docker-based deployment
20
- - Simple API endpoints
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  ## API Endpoints
23
 
24
- - `GET /` - Returns a greeting message
 
 
 
 
25
 
26
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
27
 
 
15
 
16
  ## Features
17
 
18
+ - FastAPI web application with AI integration
19
+ - Research Team for Claims Anchoring and Reference Formatting
20
+ - Modern web interface with sidebar navigation
21
+ - Docker-based deployment for Hugging Face Spaces
22
+ - Comprehensive API endpoints
23
+
24
+ ## Web Interface
25
+
26
+ The application features a modern, responsive web interface with:
27
+
28
+ - **Sidebar Navigation**: Switch between AI Question Generator and Research Team Document Processor
29
+ - **Health Monitoring**: Real-time API health checks and status monitoring
30
+ - **Interactive Results**: Formatted display of research results with metrics and raw data
31
+ - **Responsive Design**: Works on desktop and mobile devices
32
+
33
+ ## Quick Start
34
+
35
+ ```bash
36
+ # Install dependencies
37
+ pip install -r requirements.txt
38
+
39
+ # Start the application
40
+ uvicorn app:app --host 0.0.0.0 --port 8000 --reload
41
+ ```
42
+
43
+ The web interface will be available at: http://localhost:8000
44
 
45
  ## API Endpoints
46
 
47
+ - `GET /` - Returns HTML interface
48
+ - `GET /api/hello` - Returns a JSON greeting message
49
+ - `GET /api/health` - Health check endpoint
50
+ - `POST /api/generate` - AI question answering
51
+ - `POST /api/research/process` - Document processing with Research Team
52
 
53
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
54
 
app.py CHANGED
@@ -1,14 +1,12 @@
1
  import os
2
- from fastapi import FastAPI, HTTPException, UploadFile, File
3
  from fastapi.responses import HTMLResponse
4
  from pydantic import BaseModel
5
  from dotenv import load_dotenv
6
  import asyncio
7
 
8
- # Importar dependencias de LangChain y OpenAI
9
- from langchain_openai import OpenAI, ChatOpenAI
10
- from langchain.chains import LLMChain
11
- from langchain.prompts import PromptTemplate
12
 
13
  # Import ResearchTeam
14
  from research_team import create_research_team
@@ -48,6 +46,259 @@ def get_research_team():
48
  research_team = create_research_team()
49
  return research_team
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def answer_question(question: str):
52
  """
53
  Función para responder preguntas usando OpenAI LLM
@@ -56,239 +307,371 @@ def answer_question(question: str):
56
  raise HTTPException(status_code=400, detail="Please provide a question.")
57
 
58
  # Obtener API key de OpenAI desde variables de entorno
59
- openai_api_key = os.getenv("OPENAI_API_KEY")
60
- if not openai_api_key or openai_api_key == "your_openai_api_key_here":
61
- raise HTTPException(status_code=500, detail="OpenAI API key not configured")
 
62
 
63
- # Template simple para responder preguntas
64
- prompt_template = PromptTemplate(
65
- template="Answer the following question clearly and concisely: {question}",
66
- input_variables=["question"]
67
- )
68
-
69
- # Inicializar OpenAI LLM
70
  try:
71
- llm = OpenAI(
72
- api_key=openai_api_key,
73
- temperature=0.7
74
- )
75
- #llm = ChatOpenAI(
76
- # model="openai/gpt-4.1",
77
- # temperature=0.7,
78
- # api_key=os.getenv("GEAI_API_KEY"),
79
- # base_url=os.getenv("GEAI_BASE_URL")
80
- #)
81
- #llm = ChatOpenAI(
82
- # model="openai/gpt-4.1",
83
- # temperature=0.7,
84
- # api_key=os.getenv("GEAI_API_KEY"),
85
- # base_url=os.getenv("GEAI_BASE_URL")
86
- #)
87
-
88
- # Crear cadena LLM
89
- llm_chain = LLMChain(
90
- prompt=prompt_template,
91
- llm=llm
92
  )
93
-
94
- # Generar respuesta
95
- response = llm_chain.run(question=question)
96
  return GenerateResponse(text=response.strip())
97
 
98
  except Exception as e:
99
  raise HTTPException(status_code=500, detail=f"Error generating response: {str(e)}")
100
 
101
- @app.get("/", response_class=HTMLResponse)
102
- def read_root():
103
- """
104
- Endpoint principal que devuelve una página HTML simple
105
- """
106
- html_content = """
107
  <!DOCTYPE html>
108
  <html>
109
  <head>
110
  <title>SciResearch API</title>
 
111
  <style>
112
- body { font-family: Arial, sans-serif; margin: 40px; }
113
- h1 { color: #333; }
114
- .container { max-width: 800px; margin: 0 auto; }
115
- .form-group { margin: 20px 0; }
116
- .section { border: 1px solid #ddd; padding: 20px; margin: 20px 0; border-radius: 5px; }
117
- input[type="text"] { width: 100%; padding: 10px; margin: 5px 0; }
118
- textarea { width: 100%; padding: 10px; margin: 5px 0; height: 150px; }
119
- button { background-color: #4CAF50; color: white; padding: 10px 20px; border: none; cursor: pointer; margin: 5px; }
120
- button:hover { background-color: #45a049; }
121
- .research-button { background-color: #2196F3; }
122
- .research-button:hover { background-color: #1976D2; }
123
- #response, #research-response { background-color: #f9f9f9; padding: 15px; margin-top: 20px; border-left: 4px solid #4CAF50; }
124
- #research-response { border-left-color: #2196F3; }
125
- .result-section { margin: 10px 0; padding: 10px; background-color: #f5f5f5; }
126
- .loading { color: #666; font-style: italic; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  </style>
128
  </head>
129
  <body>
130
- <div class="container">
131
- <h1>🦀 SciResearch API with Research Team</h1>
132
- <p>¡Bienvenido a la aplicación de investigación científica con IA y equipo de research para análisis de documentos!</p>
133
-
134
- <div class="section">
135
- <h3>💬 Pregunta a la IA:</h3>
136
- <div class="form-group">
137
- <input type="text" id="question" placeholder="Escribe tu pregunta aquí..." />
138
- <button onclick="askQuestion()">Preguntar</button>
 
139
  </div>
140
 
141
- <div id="response" style="display:none;">
142
- <h4>Respuesta:</h4>
143
- <p id="answer"></p>
 
144
  </div>
145
  </div>
146
-
147
- <div class="section">
148
- <h3>📄 Research Team - Claims Anchoring & Reference Formatting:</h3>
149
- <div class="form-group">
150
- <textarea id="document" placeholder="Pega aquí el contenido del documento para analizar claims y referencias..."></textarea>
151
- <button class="research-button" onclick="processDocument()">Procesar Documento</button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  </div>
153
 
154
- <div id="research-response" style="display:none;">
155
- <h4>Resultados del Research Team:</h4>
156
- <div id="research-results"></div>
 
 
 
 
 
 
 
 
 
 
157
  </div>
158
  </div>
159
-
160
- <h2>Endpoints disponibles:</h2>
161
- <ul>
162
- <li><a href="/docs">/docs</a> - Documentación interactiva de la API</li>
163
- <li><a href="/api/hello">/api/hello</a> - Saludo JSON</li>
164
- <li><a href="/api/health">/api/health</a> - Estado de la aplicación</li>
165
- <li><strong>/api/generate</strong> - Generar respuestas con IA (POST)</li>
166
- <li><strong>/api/research/process</strong> - Procesar documento con Research Team (POST)</li>
167
- </ul>
168
  </div>
169
-
170
- <script>
171
- async function askQuestion() {
172
- const question = document.getElementById('question').value;
173
- if (!question.trim()) {
174
- alert('Por favor escribe una pregunta');
175
- return;
176
- }
177
-
178
- try {
179
- const response = await fetch('/api/generate', {
180
- method: 'POST',
181
- headers: {
182
- 'Content-Type': 'application/json',
183
- },
184
- body: JSON.stringify({question: question})
185
- });
186
-
187
- const data = await response.json();
188
-
189
- if (response.ok) {
190
- document.getElementById('answer').textContent = data.text;
191
- document.getElementById('response').style.display = 'block';
192
- } else {
193
- alert('Error: ' + data.detail);
194
- }
195
- } catch (error) {
196
- alert('Error de conexión: ' + error.message);
197
- }
198
- }
199
-
200
- async function processDocument() {
201
- const document_content = document.getElementById('document').value;
202
- if (!document_content.trim()) {
203
- alert('Por favor pega el contenido del documento');
204
- return;
205
- }
206
-
207
- // Show loading state
208
- const resultsDiv = document.getElementById('research-results');
209
- resultsDiv.innerHTML = '<p class="loading">Procesando documento... Esto puede tomar unos minutos.</p>';
210
- document.getElementById('research-response').style.display = 'block';
211
-
212
- try {
213
- const response = await fetch('/api/research/process', {
214
- method: 'POST',
215
- headers: {
216
- 'Content-Type': 'application/json',
217
- },
218
- body: JSON.stringify({document_content: document_content})
219
- });
220
-
221
- const data = await response.json();
222
-
223
- if (response.ok) {
224
- displayResearchResults(data.result);
225
- } else {
226
- resultsDiv.innerHTML = '<p style="color: red;">Error: ' + data.detail + '</p>';
227
- }
228
- } catch (error) {
229
- resultsDiv.innerHTML = '<p style="color: red;">Error de conexión: ' + error.message + '</p>';
230
- }
231
- }
232
-
233
- function displayResearchResults(result) {
234
- const resultsDiv = document.getElementById('research-results');
235
-
236
- let html = '';
237
-
238
- // Document metadata
239
- if (result.document_metadata) {
240
- html += '<div class="result-section">';
241
- html += '<h4>📋 Metadatos del Documento:</h4>';
242
- html += '<p><strong>Producto:</strong> ' + (result.document_metadata.product || 'No detectado') + '</p>';
243
- html += '<p><strong>Países:</strong> ' + (result.document_metadata.countries?.join(', ') || 'No detectados') + '</p>';
244
- html += '<p><strong>Idioma:</strong> ' + (result.document_metadata.language || 'No detectado') + '</p>';
245
- html += '</div>';
246
- }
247
-
248
- // Claims analysis
249
- if (result.claims_analysis) {
250
- html += '<div class="result-section">';
251
- html += '<h4>🔍 Análisis de Claims:</h4>';
252
- html += '<p><strong>Total de Claims:</strong> ' + result.claims_analysis.total_claims + '</p>';
253
- html += '<p><strong>Claims Principales:</strong> ' + result.claims_analysis.core_claims_count + '</p>';
254
- html += '</div>';
255
- }
256
-
257
- // Claims anchoring
258
- if (result.claims_anchoring) {
259
- html += '<div class="result-section">';
260
- html += '<h4>⚓ Claims Anchoring:</h4>';
261
- if (result.claims_anchoring.summary) {
262
- const summary = result.claims_anchoring.summary;
263
- html += '<p><strong>Claims Procesados:</strong> ' + summary.total_claims_processed + '</p>';
264
- html += '<p><strong>Validados Exitosamente:</strong> ' + summary.successfully_validated + '</p>';
265
- html += '<p><strong>Tasa de Validación:</strong> ' + Math.round(summary.validation_rate * 100) + '%</p>';
266
- }
267
- html += '</div>';
268
- }
269
-
270
- // Reference formatting
271
- if (result.reference_formatting) {
272
- html += '<div class="result-section">';
273
- html += '<h4>📚 Formateo de Referencias:</h4>';
274
- html += '<p><strong>Referencias Formateadas:</strong> ' + result.reference_formatting.total_references + '</p>';
275
- html += '</div>';
276
- }
277
-
278
- resultsDiv.innerHTML = html;
279
- }
280
-
281
- // Permitir envío con Enter
282
- document.getElementById('question').addEventListener('keypress', function(e) {
283
- if (e.key === 'Enter') {
284
- askQuestion();
285
- }
286
- });
287
- </script>
288
  </body>
289
  </html>
290
  """
291
- return html_content
 
 
 
 
 
 
 
 
 
292
 
293
  @app.get("/api/hello")
294
  def greet_json():
@@ -302,13 +685,13 @@ def health_check():
302
  """
303
  Endpoint para verificar el estado de la aplicación
304
  """
305
- openai_configured = bool(os.getenv("OPENAI_API_KEY")) and os.getenv("OPENAI_API_KEY") != "your_openai_api_key_here"
306
 
307
  return {
308
  "status": "healthy",
309
  "service": "sciresearch",
310
  "version": "1.0.0",
311
- "openai_configured": openai_configured,
312
  "research_team_available": True
313
  }
314
 
@@ -319,6 +702,43 @@ def inference(request: QuestionRequest):
319
  """
320
  return answer_question(question=request.question)
321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  @app.post("/api/research/process", summary="Process document with Research Team", tags=["Research Team"], response_model=ResearchResponse)
323
  async def process_document_research(request: DocumentRequest):
324
  """
 
1
  import os
2
+ from fastapi import FastAPI, HTTPException, UploadFile, File, Form
3
  from fastapi.responses import HTMLResponse
4
  from pydantic import BaseModel
5
  from dotenv import load_dotenv
6
  import asyncio
7
 
8
+ # Importar dependencias de OpenAI
9
+ from openai import OpenAI
 
 
10
 
11
  # Import ResearchTeam
12
  from research_team import create_research_team
 
46
  research_team = create_research_team()
47
  return research_team
48
 
49
+ def get_html_with_response(question: str, answer: str, status: str, error_msg: str = None):
50
+ """Generate HTML page with AI response"""
51
+ response_content = ""
52
+ if status == "success" and answer:
53
+ response_content = f'''
54
+ <div class="response-section">
55
+ <h4>🤖 AI Response:</h4>
56
+ <div style="margin-bottom: 15px;">
57
+ {answer.replace(chr(10), '<br>')}
58
+ </div>
59
+ </div>
60
+ '''
61
+ elif status == "error":
62
+ response_content = f'''
63
+ <div class="response-section error">
64
+ <h4>❌ Error:</h4>
65
+ <div style="color: red;">
66
+ {error_msg or "Unknown error occurred"}
67
+ </div>
68
+ </div>
69
+ '''
70
+
71
+ return get_base_html("ai-generator", question, "", response_content, "")
72
+
73
+ def get_html_with_research_response(document: str, result: dict, status: str, error_msg: str = None):
74
+ """Generate HTML page with research response"""
75
+ response_content = ""
76
+ if status == "success" and result:
77
+ response_content = f'''
78
+ <div class="response-section">
79
+ <h4>📊 Research Team Results:</h4>
80
+ {format_research_results(result)}
81
+ </div>
82
+ '''
83
+ elif status == "error":
84
+ response_content = f'''
85
+ <div class="response-section error">
86
+ <h4>❌ Error:</h4>
87
+ <div style="color: red;">
88
+ {error_msg or "Unknown error occurred"}
89
+ </div>
90
+ </div>
91
+ '''
92
+
93
+ return get_base_html("research-team", "", document, "", response_content)
94
+
95
+ def format_research_results(result: dict) -> str:
96
+ """Format research results as HTML"""
97
+ html = ""
98
+
99
+ # Handle new structure with detailed_analysis and summary_statistics
100
+ summary_stats = result.get("summary_statistics", {})
101
+ detailed_analysis = result.get("detailed_analysis", {})
102
+
103
+ # DETAILED ANALYSIS SECTION (Priority Content)
104
+ if detailed_analysis:
105
+ html += f'''
106
+ <div style="margin-bottom: 30px;">
107
+ <h5>📋 Detailed Analysis Results</h5>
108
+ '''
109
+
110
+ # Claims Extracted Details
111
+ if "claims_extracted" in detailed_analysis:
112
+ claims_data = detailed_analysis["claims_extracted"]
113
+ all_claims = claims_data.get("all_claims", [])
114
+ core_claims = claims_data.get("core_claims", [])
115
+
116
+ html += f'''
117
+ <div style="background: white; padding: 20px; border-radius: 8px; border: 1px solid #e1e5e9; margin-bottom: 20px;">
118
+ <h6>🔍 Claims Extraction</h6>
119
+ <div style="margin-bottom: 15px;">
120
+ <strong>Total Claims Found:</strong> {len(all_claims)} | <strong>Core Claims:</strong> {len(core_claims)}
121
+ </div>
122
+ <details style="margin-bottom: 10px;">
123
+ <summary style="cursor: pointer; font-weight: bold;">View All Claims ({len(all_claims)})</summary>
124
+ <div style="margin-top: 10px; max-height: 300px; overflow-y: auto;">
125
+ '''
126
+ for i, claim in enumerate(all_claims[:10]): # Show first 10 claims
127
+ claim_type_color = {"core": "#e74c3c", "supporting": "#f39c12", "contextual": "#3498db"}.get(claim.get("type", "contextual"), "#95a5a6")
128
+ html += f'''
129
+ <div style="padding: 10px; margin: 5px 0; border-left: 4px solid {claim_type_color}; background: #f8f9fa;">
130
+ <strong>Claim {claim.get('id', i+1)}:</strong> {claim.get('text', '')[:200]}{'...' if len(claim.get('text', '')) > 200 else ''}<br>
131
+ <small style="color: #666;">Type: {claim.get('type', 'unknown').title()} | Score: {claim.get('importance_score', 0)}</small>
132
+ </div>
133
+ '''
134
+ if len(all_claims) > 10:
135
+ html += f'<div style="text-align: center; color: #666; margin-top: 10px;">... and {len(all_claims) - 10} more claims</div>'
136
+
137
+ html += '''
138
+ </div>
139
+ </details>
140
+ </div>
141
+ '''
142
+
143
+ # Anchoring Results Details
144
+ if "anchoring_results" in detailed_analysis:
145
+ anchoring_data = detailed_analysis["anchoring_results"]
146
+ claims_with_evidence = anchoring_data.get("claims_with_evidence", [])
147
+
148
+ html += f'''
149
+ <div style="background: white; padding: 20px; border-radius: 8px; border: 1px solid #e1e5e9; margin-bottom: 20px;">
150
+ <h6>⚓ Claims Anchoring & Evidence</h6>
151
+ <details style="margin-bottom: 10px;">
152
+ <summary style="cursor: pointer; font-weight: bold;">View Anchoring Results ({len(claims_with_evidence)})</summary>
153
+ <div style="margin-top: 10px; max-height: 400px; overflow-y: auto;">
154
+ '''
155
+ for claim_evidence in claims_with_evidence:
156
+ status_color = {"validated": "#27ae60", "partial": "#f39c12", "unsupported": "#e74c3c"}.get(claim_evidence.get("validation_status", "unknown"), "#95a5a6")
157
+ html += f'''
158
+ <div style="padding: 15px; margin: 10px 0; border: 1px solid #e1e5e9; border-radius: 8px;">
159
+ <div style="display: flex; align-items: center; margin-bottom: 10px;">
160
+ <strong>Claim {claim_evidence.get('claim_id', '')}:</strong>
161
+ <span style="margin-left: 10px; padding: 4px 8px; background: {status_color}; color: white; border-radius: 4px; font-size: 12px;">
162
+ {claim_evidence.get('validation_status', 'unknown').title()}
163
+ </span>
164
+ </div>
165
+ <div style="margin-bottom: 10px; color: #333;">
166
+ {claim_evidence.get('claim_text', '')[:300]}{'...' if len(claim_evidence.get('claim_text', '')) > 300 else ''}
167
+ </div>
168
+ <div style="margin-bottom: 10px;">
169
+ <strong>Supporting Evidence:</strong> {len(claim_evidence.get('supporting_evidence', []))} passages found
170
+ </div>
171
+ <div style="margin-bottom: 10px;">
172
+ <strong>References:</strong> {len(claim_evidence.get('anchored_references', []))} references anchored
173
+ </div>
174
+ {f'<div style="font-size: 12px; color: #666;"><strong>Quality Assessment:</strong> {claim_evidence.get("quality_assessment", "")}</div>' if claim_evidence.get("quality_assessment") else ''}
175
+ </div>
176
+ '''
177
+ html += '''
178
+ </div>
179
+ </details>
180
+ </div>
181
+ '''
182
+
183
+ # Formatted References Details
184
+ if "formatted_references" in detailed_analysis:
185
+ ref_data = detailed_analysis["formatted_references"]
186
+ reference_details = ref_data.get("reference_details", [])
187
+
188
+ html += f'''
189
+ <div style="background: white; padding: 20px; border-radius: 8px; border: 1px solid #e1e5e9; margin-bottom: 20px;">
190
+ <h6>📚 Formatted References</h6>
191
+ <details style="margin-bottom: 10px;">
192
+ <summary style="cursor: pointer; font-weight: bold;">View Formatted References ({len(reference_details)})</summary>
193
+ <div style="margin-top: 10px; max-height: 300px; overflow-y: auto;">
194
+ '''
195
+ for ref_detail in reference_details:
196
+ status_color = {"complete": "#27ae60", "incomplete": "#f39c12", "not_found": "#e74c3c"}.get(ref_detail.get("completion_status", "unknown"), "#95a5a6")
197
+ html += f'''
198
+ <div style="padding: 10px; margin: 5px 0; border-left: 4px solid {status_color}; background: #f8f9fa;">
199
+ <div style="font-weight: bold; margin-bottom: 5px;">Reference {ref_detail.get('reference_id', '')}</div>
200
+ <div style="margin-bottom: 5px;">{ref_detail.get('formatted_citation', '')}</div>
201
+ <small style="color: #666;">Type: {ref_detail.get('source_type', 'unknown').title()} | Status: {ref_detail.get('completion_status', 'unknown').title()}</small>
202
+ </div>
203
+ '''
204
+ html += '''
205
+ </div>
206
+ </details>
207
+ </div>
208
+ '''
209
+
210
+ html += '''
211
+ </div>
212
+ '''
213
+
214
+ # SUMMARY STATISTICS SECTION (Secondary Information)
215
+ if summary_stats:
216
+ html += f'''
217
+ <div style="margin-bottom: 20px;">
218
+ <h5>📊 Summary Statistics</h5>
219
+ '''
220
+
221
+ # Document metadata
222
+ if "document_metadata" in summary_stats:
223
+ metadata = summary_stats["document_metadata"]
224
+ html += f'''
225
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-bottom: 20px;">
226
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
227
+ <div style="font-size: 20px; font-weight: bold; color: #667eea;">{metadata.get('product', 'Not detected')}</div>
228
+ <div style="font-size: 12px; color: #666; margin-top: 5px;">Product</div>
229
+ </div>
230
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
231
+ <div style="font-size: 20px; font-weight: bold; color: #667eea;">{metadata.get('language', 'Not detected')}</div>
232
+ <div style="font-size: 12px; color: #666; margin-top: 5px;">Language</div>
233
+ </div>
234
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
235
+ <div style="font-size: 20px; font-weight: bold; color: #667eea;">{', '.join(metadata.get('countries', [])) if metadata.get('countries') else 'Not detected'}</div>
236
+ <div style="font-size: 12px; color: #666; margin-top: 5px;">Countries</div>
237
+ </div>
238
+ </div>
239
+ '''
240
+
241
+ # Claims analysis summary
242
+ if "claims_analysis" in summary_stats:
243
+ claims = summary_stats["claims_analysis"]
244
+ html += f'''
245
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-bottom: 20px;">
246
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
247
+ <div style="font-size: 20px; font-weight: bold; color: #667eea;">{claims.get('total_claims', 0)}</div>
248
+ <div style="font-size: 12px; color: #666; margin-top: 5px;">Total Claims</div>
249
+ </div>
250
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
251
+ <div style="font-size: 20px; font-weight: bold; color: #667eea;">{claims.get('core_claims_count', 0)}</div>
252
+ <div style="font-size: 12px; color: #666; margin-top: 5px;">Core Claims</div>
253
+ </div>
254
+ </div>
255
+ '''
256
+
257
+ # Claims anchoring summary
258
+ if "claims_anchoring" in summary_stats and "summary" in summary_stats["claims_anchoring"]:
259
+ summary = summary_stats["claims_anchoring"]["summary"]
260
+ validation_rate = int((summary.get("validation_rate", 0) * 100))
261
+ html += f'''
262
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px; margin-bottom: 20px;">
263
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
264
+ <div style="font-size: 20px; font-weight: bold; color: #667eea;">{summary.get('total_claims_processed', 0)}</div>
265
+ <div style="font-size: 12px; color: #666; margin-top: 5px;">Claims Processed</div>
266
+ </div>
267
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
268
+ <div style="font-size: 20px; font-weight: bold; color: #667eea;">{summary.get('successfully_validated', 0)}</div>
269
+ <div style="font-size: 12px; color: #666; margin-top: 5px;">Successfully Validated</div>
270
+ </div>
271
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
272
+ <div style="font-size: 20px; font-weight: bold; color: #667eea;">{validation_rate}%</div>
273
+ <div style="font-size: 12px; color: #666; margin-top: 5px;">Validation Rate</div>
274
+ </div>
275
+ </div>
276
+ '''
277
+
278
+ # Reference formatting summary
279
+ if "reference_formatting" in summary_stats:
280
+ refs = summary_stats["reference_formatting"]
281
+ html += f'''
282
+ <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 15px;">
283
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e1e5e9; text-align: center;">
284
+ <div style="font-size: 20px; font-weight: bold; color: #667eea;">{refs.get('total_references', 0)}</div>
285
+ <div style="font-size: 12px; color: #666; margin-top: 5px;">References Formatted</div>
286
+ </div>
287
+ </div>
288
+ '''
289
+
290
+ html += '''
291
+ </div>
292
+ '''
293
+
294
+ return html
295
+
296
+ def create_openai_client():
297
+ """Create and return OpenAI client instance."""
298
+ geai_api_key = os.getenv("GEAI_API_KEY")
299
+ geai_base_url = os.getenv("GEAI_API_BASE_URL")
300
+ return OpenAI(api_key=geai_api_key, base_url=geai_base_url)
301
+
302
  def answer_question(question: str):
303
  """
304
  Función para responder preguntas usando OpenAI LLM
 
307
  raise HTTPException(status_code=400, detail="Please provide a question.")
308
 
309
  # Obtener API key de OpenAI desde variables de entorno
310
+ geai_api_key = os.getenv("GEAI_API_KEY")
311
+ geai_base_url = os.getenv("GEAI_API_BASE_URL")
312
+ if not geai_api_key:
313
+ raise HTTPException(status_code=500, detail="GEAI API key not configured")
314
 
315
+ # Inicializar OpenAI client
 
 
 
 
 
 
316
  try:
317
+ # Create OpenAI client
318
+ client = create_openai_client()
319
+
320
+ # Make the LLM call
321
+ completion = client.chat.completions.create(
322
+ model="openai/gpt-4o-mini",
323
+ messages=[{"role": "user", "content": f"Answer the following question clearly and concisely: {question}"}],
324
+ temperature=0.7,
325
+ max_tokens=500
 
 
 
 
 
 
 
 
 
 
 
 
326
  )
327
+
328
+ response = completion.choices[0].message.content
 
329
  return GenerateResponse(text=response.strip())
330
 
331
  except Exception as e:
332
  raise HTTPException(status_code=500, detail=f"Error generating response: {str(e)}")
333
 
334
+ def get_base_html(active_section: str = "ai-generator", question_value: str = "", document_value: str = "", ai_response: str = "", research_response: str = ""):
335
+ """Generate base HTML with optional responses"""
336
+ ai_display = "" if active_section == "ai-generator" else "display: none;"
337
+ research_display = "" if active_section == "research-team" else "display: none;"
338
+
339
+ return f"""
340
  <!DOCTYPE html>
341
  <html>
342
  <head>
343
  <title>SciResearch API</title>
344
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
345
  <style>
346
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
347
+ body {{
348
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
349
+ background-color: #f5f7fa;
350
+ color: #333;
351
+ }}
352
+
353
+ .app-container {{
354
+ display: flex;
355
+ min-height: 100vh;
356
+ }}
357
+
358
+ /* Sidebar Styles */
359
+ .sidebar {{
360
+ width: 300px;
361
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
362
+ color: white;
363
+ padding: 20px;
364
+ box-shadow: 2px 0 10px rgba(0,0,0,0.1);
365
+ position: fixed;
366
+ height: 100vh;
367
+ overflow-y: auto;
368
+ }}
369
+
370
+ .sidebar h1 {{
371
+ font-size: 24px;
372
+ margin-bottom: 10px;
373
+ display: flex;
374
+ align-items: center;
375
+ gap: 10px;
376
+ }}
377
+
378
+ .sidebar p {{
379
+ margin-bottom: 30px;
380
+ opacity: 0.9;
381
+ font-size: 14px;
382
+ }}
383
+
384
+ .sidebar-section {{
385
+ margin-bottom: 30px;
386
+ }}
387
+
388
+ .sidebar-section h3 {{
389
+ font-size: 16px;
390
+ margin-bottom: 15px;
391
+ border-bottom: 1px solid rgba(255,255,255,0.3);
392
+ padding-bottom: 5px;
393
+ }}
394
+
395
+ .nav-link {{
396
+ display: block;
397
+ color: rgba(255,255,255,0.8);
398
+ text-decoration: none;
399
+ padding: 10px 15px;
400
+ margin: 5px 0;
401
+ border-radius: 8px;
402
+ transition: background 0.3s;
403
+ }}
404
+
405
+ .nav-link:hover, .nav-link.active {{
406
+ background: rgba(255,255,255,0.2);
407
+ color: white;
408
+ }}
409
+
410
+ /* Main Content Styles */
411
+ .main-content {{
412
+ flex: 1;
413
+ margin-left: 300px;
414
+ padding: 40px;
415
+ background: white;
416
+ min-height: 100vh;
417
+ }}
418
+
419
+ .content-header {{
420
+ margin-bottom: 30px;
421
+ }}
422
+
423
+ .content-header h2 {{
424
+ font-size: 28px;
425
+ color: #333;
426
+ margin-bottom: 10px;
427
+ }}
428
+
429
+ .content-header p {{
430
+ color: #666;
431
+ font-size: 16px;
432
+ }}
433
+
434
+ .generator-section {{
435
+ background: white;
436
+ border-radius: 12px;
437
+ padding: 30px;
438
+ box-shadow: 0 2px 20px rgba(0,0,0,0.08);
439
+ margin-bottom: 20px;
440
+ }}
441
+
442
+ .form-group {{
443
+ margin-bottom: 20px;
444
+ }}
445
+
446
+ .form-group label {{
447
+ display: block;
448
+ font-weight: 600;
449
+ margin-bottom: 8px;
450
+ color: #333;
451
+ }}
452
+
453
+ input[type="text"], textarea {{
454
+ width: 100%;
455
+ padding: 12px 16px;
456
+ border: 2px solid #e1e5e9;
457
+ border-radius: 8px;
458
+ font-size: 14px;
459
+ transition: border-color 0.3s, box-shadow 0.3s;
460
+ font-family: inherit;
461
+ }}
462
+
463
+ input[type="text"]:focus, textarea:focus {{
464
+ outline: none;
465
+ border-color: #667eea;
466
+ box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
467
+ }}
468
+
469
+ textarea {{
470
+ height: 200px;
471
+ resize: vertical;
472
+ }}
473
+
474
+ .btn {{
475
+ padding: 12px 24px;
476
+ border: none;
477
+ border-radius: 8px;
478
+ cursor: pointer;
479
+ font-size: 14px;
480
+ font-weight: 600;
481
+ transition: all 0.3s;
482
+ display: inline-flex;
483
+ align-items: center;
484
+ gap: 8px;
485
+ }}
486
+
487
+ .btn-primary {{
488
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
489
+ color: white;
490
+ }}
491
+
492
+ .btn-primary:hover {{
493
+ transform: translateY(-2px);
494
+ box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4);
495
+ }}
496
+
497
+ .btn-secondary {{
498
+ background: linear-gradient(135deg, #36d1dc 0%, #5b86e5 100%);
499
+ color: white;
500
+ }}
501
+
502
+ .btn-secondary:hover {{
503
+ transform: translateY(-2px);
504
+ box-shadow: 0 4px 15px rgba(54, 209, 220, 0.4);
505
+ }}
506
+
507
+ .response-section {{
508
+ margin-top: 25px;
509
+ padding: 20px;
510
+ background: #f8f9fa;
511
+ border-radius: 8px;
512
+ border-left: 4px solid #667eea;
513
+ }}
514
+
515
+ .response-section.error {{
516
+ border-left-color: #dc3545;
517
+ }}
518
+
519
+ .response-section h4 {{
520
+ margin-bottom: 15px;
521
+ color: #333;
522
+ }}
523
+
524
+ .api-info {{
525
+ background: #f8f9fa;
526
+ padding: 20px;
527
+ border-radius: 8px;
528
+ margin-top: 30px;
529
+ }}
530
+
531
+ .api-info h3 {{
532
+ margin-bottom: 15px;
533
+ color: #333;
534
+ }}
535
+
536
+ .api-info ul {{
537
+ list-style: none;
538
+ }}
539
+
540
+ .api-info li {{
541
+ padding: 8px 0;
542
+ border-bottom: 1px solid #e1e5e9;
543
+ }}
544
+
545
+ .api-info li:last-child {{
546
+ border-bottom: none;
547
+ }}
548
+
549
+ .api-info a {{
550
+ color: #667eea;
551
+ text-decoration: none;
552
+ }}
553
+
554
+ .api-info a:hover {{
555
+ text-decoration: underline;
556
+ }}
557
+
558
+ /* Responsive Design */
559
+ @media (max-width: 768px) {{
560
+ .sidebar {{
561
+ width: 100%;
562
+ position: relative;
563
+ height: auto;
564
+ }}
565
+
566
+ .main-content {{
567
+ margin-left: 0;
568
+ padding: 20px;
569
+ }}
570
+
571
+ .app-container {{
572
+ flex-direction: column;
573
+ }}
574
+ }}
575
  </style>
576
  </head>
577
  <body>
578
+ <div class="app-container">
579
+ <!-- Sidebar -->
580
+ <div class="sidebar">
581
+ <h1>🦀 SciResearch</h1>
582
+ <p>Scientific Research FastAPI application with AI integration and Research Team</p>
583
+
584
+ <div class="sidebar-section">
585
+ <h3>📡 Select Generator</h3>
586
+ <a href="/" class="nav-link {'active' if active_section == 'ai-generator' else ''}">💬 AI Question Generator</a>
587
+ <a href="/?mode=research" class="nav-link {'active' if active_section == 'research-team' else ''}">📄 Research Team Processor</a>
588
  </div>
589
 
590
+ <div class="sidebar-section">
591
+ <h3>📋 Quick Links</h3>
592
+ <a href="/docs" class="nav-link">📚 API Documentation</a>
593
+ <a href="/api/health" class="nav-link">🔧 Health Endpoint</a>
594
  </div>
595
  </div>
596
+
597
+ <!-- Main Content -->
598
+ <div class="main-content">
599
+ <!-- AI Generator Section -->
600
+ <div id="ai-generator-section" class="generator-section" style="{ai_display}">
601
+ <div class="content-header">
602
+ <h2>💬 AI Question Generator</h2>
603
+ <p>Ask questions and get AI-powered responses from the research assistant</p>
604
+ </div>
605
+
606
+ <form action="/ask" method="post">
607
+ <div class="form-group">
608
+ <label for="question">Enter your question:</label>
609
+ <input type="text" name="question" id="question"
610
+ value="{question_value}"
611
+ placeholder="What would you like to know about scientific research?"
612
+ required />
613
+ </div>
614
+
615
+ <button type="submit" class="btn btn-primary">
616
+ 🚀 Submit Question
617
+ </button>
618
+ </form>
619
+
620
+ {ai_response}
621
+ </div>
622
+
623
+ <!-- Research Team Section -->
624
+ <div id="research-team-section" class="generator-section" style="{research_display}">
625
+ <div class="content-header">
626
+ <h2>📄 Research Team Document Processor</h2>
627
+ <p>Process documents for claims anchoring and reference formatting using the AI research team</p>
628
+ </div>
629
+
630
+ <form action="/process" method="post">
631
+ <div class="form-group">
632
+ <label for="document">Paste your document content:</label>
633
+ <textarea name="document_content" id="document"
634
+ placeholder="Paste the content of your research document here..."
635
+ required>{document_value}</textarea>
636
+ </div>
637
+
638
+ <button type="submit" class="btn btn-secondary">
639
+ 🔬 Process Document
640
+ </button>
641
+ </form>
642
+
643
+ {research_response}
644
  </div>
645
 
646
+ <!-- API Information -->
647
+ <div class="api-info">
648
+ <h3>🔗 Available API Endpoints</h3>
649
+ <ul>
650
+ <li><strong>GET /</strong> - This HTML interface</li>
651
+ <li><strong>GET /docs</strong> - Interactive API documentation</li>
652
+ <li><strong>GET /api/hello</strong> - JSON greeting message</li>
653
+ <li><strong>GET /api/health</strong> - Application health check</li>
654
+ <li><strong>POST /ask</strong> - AI question answering (form)</li>
655
+ <li><strong>POST /process</strong> - Document processing (form)</li>
656
+ <li><strong>POST /api/generate</strong> - AI question answering (JSON API)</li>
657
+ <li><strong>POST /api/research/process</strong> - Document processing (JSON API)</li>
658
+ </ul>
659
  </div>
660
  </div>
 
 
 
 
 
 
 
 
 
661
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
662
  </body>
663
  </html>
664
  """
665
+
666
+ @app.get("/", response_class=HTMLResponse)
667
+ def read_root(mode: str = None):
668
+ """
669
+ Main HTML interface - supports switching between AI generator and research team
670
+ """
671
+ if mode == "research":
672
+ return get_base_html("research-team")
673
+ else:
674
+ return get_base_html("ai-generator")
675
 
676
  @app.get("/api/hello")
677
  def greet_json():
 
685
  """
686
  Endpoint para verificar el estado de la aplicación
687
  """
688
+ geai_configured = bool(os.getenv("GEAI_API_KEY")) and bool(os.getenv("GEAI_API_BASE_URL"))
689
 
690
  return {
691
  "status": "healthy",
692
  "service": "sciresearch",
693
  "version": "1.0.0",
694
+ "geai_configured": geai_configured,
695
  "research_team_available": True
696
  }
697
 
 
702
  """
703
  return answer_question(question=request.question)
704
 
705
+ @app.post("/ask", response_class=HTMLResponse)
706
+ def ask_question_form(question: str = Form(...)):
707
+ """
708
+ Form submission endpoint for questions - returns HTML response
709
+ """
710
+ try:
711
+ result = answer_question(question)
712
+ answer_text = result.text
713
+ status = "success"
714
+ error_msg = None
715
+ except Exception as e:
716
+ answer_text = ""
717
+ status = "error"
718
+ error_msg = str(e)
719
+
720
+ return get_html_with_response(question, answer_text, status, error_msg)
721
+
722
+ @app.post("/process", response_class=HTMLResponse)
723
+ def process_document_form(document_content: str = Form(...)):
724
+ """
725
+ Form submission endpoint for document processing - returns HTML response
726
+ """
727
+ try:
728
+ team = get_research_team()
729
+ import asyncio
730
+ loop = asyncio.new_event_loop()
731
+ asyncio.set_event_loop(loop)
732
+ result = loop.run_until_complete(team.process_document(document_content))
733
+ status = "success"
734
+ error_msg = None
735
+ except Exception as e:
736
+ result = {}
737
+ status = "error"
738
+ error_msg = str(e)
739
+
740
+ return get_html_with_research_response(document_content, result, status, error_msg)
741
+
742
  @app.post("/api/research/process", summary="Process document with Research Team", tags=["Research Team"], response_model=ResearchResponse)
743
  async def process_document_research(request: DocumentRequest):
744
  """
research_team.py CHANGED
@@ -13,12 +13,11 @@ from enum import Enum
13
  import operator
14
  from datetime import datetime
15
 
16
- from langchain_core.messages import HumanMessage, AIMessage
17
- from langchain_openai import ChatOpenAI
18
- from langchain_core.prompts import ChatPromptTemplate
19
  from langgraph.graph import StateGraph, START, END
20
  from langgraph.graph.message import add_messages
21
- from langgraph.prebuilt import ToolNode, create_react_agent
22
  from langchain_core.tools import tool
23
  from pydantic import BaseModel
24
  import re
@@ -96,24 +95,21 @@ class ResearchTeamState(TypedDict):
96
 
97
  # Web Search Tools Implementation
98
  class WebSearchManager:
99
- """Manager for web search operations using OpenAI agent with web_search_preview"""
100
 
101
  def __init__(self):
102
- """Initialize the web search agent"""
103
  try:
104
- self.agent = create_react_agent(
105
- model="openai:gpt-4o-mini",
106
- tools=[{"type": "web_search_preview"}]
107
- )
108
- logger.info("✅ Web search agent initialized successfully")
109
  except Exception as e:
110
- logger.error(f"❌ Failed to initialize web search agent: {e}")
111
- self.agent = None
112
 
113
  def search_web_sync(self, query: str, source_hint: str = "") -> str:
114
  """Execute web search synchronously with robust error handling"""
115
- if not self.agent:
116
- logger.error("Web search agent not available")
117
  return ""
118
 
119
  try:
@@ -122,20 +118,19 @@ class WebSearchManager:
122
 
123
  logger.info(f"🔍 Executing web search: '{enhanced_query[:50]}...'")
124
 
125
- # Use synchronous invoke instead of async
126
- response = self.agent.invoke({
127
- "messages": [enhanced_query]
128
- })
 
 
 
 
 
129
 
130
- # Extract content from response
131
- if hasattr(response, 'messages') and response.messages:
132
- content = response.messages[-1].content
133
- logger.info(f"✅ Search response received: {len(content)} characters")
134
- return content
135
- else:
136
- content = str(response)
137
- logger.info(f"✅ Search response (str): {len(content)} characters")
138
- return content
139
 
140
  except Exception as e:
141
  logger.error(f"❌ Web search error: {e}")
@@ -198,6 +193,12 @@ def get_web_search_manager():
198
  web_search_manager = WebSearchManager()
199
  return web_search_manager
200
 
 
 
 
 
 
 
201
  @tool
202
  def google_scholar_search(query: str, claim_id: str) -> Dict[str, Any]:
203
  """Real Google Scholar search using web search agent"""
@@ -333,7 +334,7 @@ class AnalyzerAgent:
333
 
334
  def __init__(self, llm):
335
  self.llm = llm
336
- self.prompt = ChatPromptTemplate.from_template("""
337
  You are an AI assistant specialized in analyzing content and extracting claims systematically.
338
 
339
  GUIDELINES:
@@ -351,25 +352,22 @@ class AnalyzerAgent:
351
 
352
  RESPONSE FORMAT:
353
  Provide response in JSON format with:
354
- {{
355
  "product": "product_name_lowercase",
356
  "countries": ["country1", "country2"],
357
  "language": "detected_language",
358
  "claims": [
359
- {{
360
  "id": "claim_1",
361
  "text": "exact claim text",
362
  "type": "core|supporting|contextual",
363
  "importance_score": 9,
364
  "position": 1,
365
  "context": "surrounding context"
366
- }}
367
  ]
368
- }}
369
-
370
- Document Content:
371
- {document_content}
372
- """)
373
 
374
  async def analyze(self, document_content: str) -> Dict[str, Any]:
375
  """Analyze document and extract structured claims"""
@@ -377,12 +375,22 @@ class AnalyzerAgent:
377
 
378
  try:
379
  logger.info("Processing document content for claims extraction")
380
- response = await self.llm.ainvoke(
381
- self.prompt.format_messages(document_content=document_content)
 
 
 
 
 
 
 
 
382
  )
383
 
 
 
384
  # Parse JSON response
385
- result = json.loads(response.content)
386
 
387
  # Separate core claims for priority processing
388
  core_claims = [claim for claim in result["claims"] if claim["type"] == "core"]
@@ -466,7 +474,7 @@ class ResearcherAgent:
466
 
467
  def __init__(self, llm):
468
  self.llm = llm
469
- self.prompt = ChatPromptTemplate.from_template("""
470
  You are an AI assistant specialized in claims anchoring and reference validation.
471
 
472
  GUIDELINES:
@@ -480,24 +488,21 @@ class ResearcherAgent:
480
  - Rate the relevance and quality of support
481
 
482
  RESPONSE FORMAT:
483
- {{
484
- "claim_id": "{claim_id}",
485
  "validation_status": "validated|partial|unsupported",
486
  "anchored_references": [
487
- {{
488
  "reference_id": "ref_id",
489
  "supporting_text": "exact text that supports claim",
490
  "relevance_score": 0.92,
491
  "section": "Results"
492
- }}
493
  ],
494
  "supporting_passages": ["passage1", "passage2"],
495
  "quality_assessment": "assessment text"
496
- }}
497
-
498
- Claim: {claim_text}
499
- Search Results: {search_results}
500
- """)
501
 
502
  async def anchor_claim(self, claim: Dict[str, Any], search_results: List[Dict]) -> Dict[str, Any]:
503
  """Perform claims anchoring for a specific claim"""
@@ -514,15 +519,19 @@ class ResearcherAgent:
514
 
515
  logger.debug(f"Retrieved full content for {len(enriched_results)} top references")
516
 
517
- response = await self.llm.ainvoke(
518
- self.prompt.format_messages(
519
- claim_id=claim["id"],
520
- claim_text=claim["text"],
521
- search_results=json.dumps(enriched_results, indent=2)
522
- )
 
 
 
523
  )
524
 
525
- result = json.loads(response.content)
 
526
  result["claim_text"] = claim["text"]
527
 
528
  logger.info(f"Claim {claim_id} anchored: {result.get('validation_status', 'unknown')} status")
@@ -544,7 +553,7 @@ class EditorAgent:
544
 
545
  def __init__(self, llm):
546
  self.llm = llm
547
- self.prompt = ChatPromptTemplate.from_template("""
548
  You are an expert in reference formatting using J&J formatting guidelines.
549
 
550
  GUIDELINES:
@@ -556,29 +565,24 @@ class EditorAgent:
556
  2. Special rules:
557
  - Use first, second, third authors + "et al." when more than 3 authors
558
  - Use italic format ONLY for book titles
559
- - Translate terms based on content language: {language}
560
  3. Complete missing information where possible
561
  4. Maintain original reference order
562
 
563
  RESPONSE FORMAT:
564
- {{
565
  "formatted_references": [
566
- {{
567
  "id": "ref_id",
568
  "original": "original reference text",
569
  "formatted": "properly formatted reference",
570
  "changes_applied": "description of changes",
571
  "source_type": "journal|book|website|etc",
572
  "completion_status": "complete|incomplete|not_found"
573
- }}
574
  ]
575
- }}
576
-
577
- References to format:
578
- {references}
579
-
580
- Content Language: {language}
581
- """)
582
 
583
  async def format_references(self, references: List[Dict], language: str = "english") -> Dict[str, Any]:
584
  """Format references according to J&J guidelines"""
@@ -586,14 +590,20 @@ class EditorAgent:
586
  logger.info(f"Content language: {language}")
587
 
588
  try:
589
- response = await self.llm.ainvoke(
590
- self.prompt.format_messages(
591
- references=json.dumps(references, indent=2),
592
- language=language
593
- )
 
 
 
 
594
  )
595
 
596
- result = json.loads(response.content)
 
 
597
  formatted_count = len(result.get("formatted_references", []))
598
  logger.info(f"Reference formatting complete: {formatted_count} references processed")
599
 
@@ -610,11 +620,7 @@ class ResearchTeamWorkflow:
610
  logger.info("Initializing Research Team Workflow")
611
 
612
  # Initialize LLM
613
- self.llm = ChatOpenAI(
614
- model="gpt-4",
615
- temperature=0.1,
616
- api_key=os.getenv("OPENAI_API_KEY")
617
- )
618
 
619
  # Initialize agents
620
  self.analyzer = AnalyzerAgent(self.llm)
@@ -782,38 +788,76 @@ class ResearchTeamWorkflow:
782
  """Assemble final results"""
783
  logger.info("STEP 6: Final Assembly - Generating comprehensive report")
784
 
 
 
 
785
  final_output = {
786
- "document_metadata": {
787
- "product": state["product"],
788
- "countries": state["countries"],
789
- "language": state["language"]
790
- },
791
- "claims_analysis": {
792
- "total_claims": len(state["all_claims"]),
793
- "core_claims_count": len(state["core_claims"]),
794
- "claims_details": state["all_claims"]
795
- },
796
- "claims_anchoring": {
797
- "results": state["anchoring_results"],
798
- "summary": self._generate_anchoring_summary(state["anchoring_results"])
799
- },
800
- "reference_formatting": {
801
- "formatted_references": state["formatted_references"],
802
- "total_references": len(state["formatted_references"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
803
  },
804
- "processing_status": state.get("processing_status", {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
805
  }
806
 
807
  state["final_output"] = final_output
808
 
809
  # Log final summary
810
- summary = final_output["claims_anchoring"]["summary"]
811
  logger.info("FINAL RESULTS SUMMARY:")
812
- logger.info(f" Total claims processed: {final_output['claims_analysis']['total_claims']}")
813
- logger.info(f" Core claims: {final_output['claims_analysis']['core_claims_count']}")
814
- logger.info(f" Successfully validated: {summary['successfully_validated']}")
815
- logger.info(f" Validation rate: {summary['validation_rate']:.1%}")
816
- logger.info(f" References formatted: {final_output['reference_formatting']['total_references']}")
817
  logger.info("STEP 6 COMPLETE: Research Team workflow finished successfully!")
818
 
819
  return state
 
13
  import operator
14
  from datetime import datetime
15
 
16
+ # Use OpenAI directly like in app.py
17
+ from openai import OpenAI
18
+
19
  from langgraph.graph import StateGraph, START, END
20
  from langgraph.graph.message import add_messages
 
21
  from langchain_core.tools import tool
22
  from pydantic import BaseModel
23
  import re
 
95
 
96
  # Web Search Tools Implementation
97
  class WebSearchManager:
98
+ """Manager for web search operations using OpenAI with gpt-4o-search-preview"""
99
 
100
  def __init__(self):
101
+ """Initialize the web search client"""
102
  try:
103
+ self.client = create_openai_client()
104
+ logger.info("✅ Web search client initialized successfully")
 
 
 
105
  except Exception as e:
106
+ logger.error(f"❌ Failed to initialize web search client: {e}")
107
+ self.client = None
108
 
109
  def search_web_sync(self, query: str, source_hint: str = "") -> str:
110
  """Execute web search synchronously with robust error handling"""
111
+ if not self.client:
112
+ logger.error("Web search client not available")
113
  return ""
114
 
115
  try:
 
118
 
119
  logger.info(f"🔍 Executing web search: '{enhanced_query[:50]}...'")
120
 
121
+ # Use OpenAI client with regular model for web search
122
+ completion = self.client.chat.completions.create(
123
+ model="openai/gpt-4o-mini-search-preview",
124
+ messages=[
125
+ {"role": "system", "content": "You are a web search assistant. Provide comprehensive and accurate information based on the search query. Include relevant details, sources, and context."},
126
+ {"role": "user", "content": enhanced_query}
127
+ ],
128
+ max_tokens=2000
129
+ )
130
 
131
+ content = completion.choices[0].message.content
132
+ logger.info(f"✅ Search response received: {len(content)} characters")
133
+ return content
 
 
 
 
 
 
134
 
135
  except Exception as e:
136
  logger.error(f"❌ Web search error: {e}")
 
193
  web_search_manager = WebSearchManager()
194
  return web_search_manager
195
 
196
+ def create_openai_client():
197
+ """Create and return OpenAI client instance using same config as app.py"""
198
+ geai_api_key = os.getenv("GEAI_API_KEY")
199
+ geai_base_url = os.getenv("GEAI_API_BASE_URL")
200
+ return OpenAI(api_key=geai_api_key, base_url=geai_base_url)
201
+
202
  @tool
203
  def google_scholar_search(query: str, claim_id: str) -> Dict[str, Any]:
204
  """Real Google Scholar search using web search agent"""
 
334
 
335
  def __init__(self, llm):
336
  self.llm = llm
337
+ self.system_prompt = """
338
  You are an AI assistant specialized in analyzing content and extracting claims systematically.
339
 
340
  GUIDELINES:
 
352
 
353
  RESPONSE FORMAT:
354
  Provide response in JSON format with:
355
+ {
356
  "product": "product_name_lowercase",
357
  "countries": ["country1", "country2"],
358
  "language": "detected_language",
359
  "claims": [
360
+ {
361
  "id": "claim_1",
362
  "text": "exact claim text",
363
  "type": "core|supporting|contextual",
364
  "importance_score": 9,
365
  "position": 1,
366
  "context": "surrounding context"
367
+ }
368
  ]
369
+ }
370
+ """
 
 
 
371
 
372
  async def analyze(self, document_content: str) -> Dict[str, Any]:
373
  """Analyze document and extract structured claims"""
 
375
 
376
  try:
377
  logger.info("Processing document content for claims extraction")
378
+
379
+ # Use direct OpenAI client like in app.py (synchronous call)
380
+ completion = self.llm.chat.completions.create(
381
+ model="openai/gpt-4o-mini",
382
+ messages=[
383
+ {"role": "system", "content": self.system_prompt},
384
+ {"role": "user", "content": f"Document Content:\n{document_content}"}
385
+ ],
386
+ temperature=0.1,
387
+ max_tokens=2000
388
  )
389
 
390
+ response_content = completion.choices[0].message.content
391
+
392
  # Parse JSON response
393
+ result = json.loads(response_content)
394
 
395
  # Separate core claims for priority processing
396
  core_claims = [claim for claim in result["claims"] if claim["type"] == "core"]
 
474
 
475
  def __init__(self, llm):
476
  self.llm = llm
477
+ self.system_prompt = """
478
  You are an AI assistant specialized in claims anchoring and reference validation.
479
 
480
  GUIDELINES:
 
488
  - Rate the relevance and quality of support
489
 
490
  RESPONSE FORMAT:
491
+ {
492
+ "claim_id": "claim_id_value",
493
  "validation_status": "validated|partial|unsupported",
494
  "anchored_references": [
495
+ {
496
  "reference_id": "ref_id",
497
  "supporting_text": "exact text that supports claim",
498
  "relevance_score": 0.92,
499
  "section": "Results"
500
+ }
501
  ],
502
  "supporting_passages": ["passage1", "passage2"],
503
  "quality_assessment": "assessment text"
504
+ }
505
+ """
 
 
 
506
 
507
  async def anchor_claim(self, claim: Dict[str, Any], search_results: List[Dict]) -> Dict[str, Any]:
508
  """Perform claims anchoring for a specific claim"""
 
519
 
520
  logger.debug(f"Retrieved full content for {len(enriched_results)} top references")
521
 
522
+ # Use direct OpenAI client like in app.py
523
+ completion = self.llm.chat.completions.create(
524
+ model="openai/gpt-4o-mini",
525
+ messages=[
526
+ {"role": "system", "content": self.system_prompt},
527
+ {"role": "user", "content": f"Claim: {claim['text']}\nSearch Results: {json.dumps(enriched_results, indent=2)}"}
528
+ ],
529
+ temperature=0.1,
530
+ max_tokens=1500
531
  )
532
 
533
+ response_content = completion.choices[0].message.content
534
+ result = json.loads(response_content)
535
  result["claim_text"] = claim["text"]
536
 
537
  logger.info(f"Claim {claim_id} anchored: {result.get('validation_status', 'unknown')} status")
 
553
 
554
  def __init__(self, llm):
555
  self.llm = llm
556
+ self.system_prompt = """
557
  You are an expert in reference formatting using J&J formatting guidelines.
558
 
559
  GUIDELINES:
 
565
  2. Special rules:
566
  - Use first, second, third authors + "et al." when more than 3 authors
567
  - Use italic format ONLY for book titles
568
+ - Translate terms based on content language
569
  3. Complete missing information where possible
570
  4. Maintain original reference order
571
 
572
  RESPONSE FORMAT:
573
+ {
574
  "formatted_references": [
575
+ {
576
  "id": "ref_id",
577
  "original": "original reference text",
578
  "formatted": "properly formatted reference",
579
  "changes_applied": "description of changes",
580
  "source_type": "journal|book|website|etc",
581
  "completion_status": "complete|incomplete|not_found"
582
+ }
583
  ]
584
+ }
585
+ """
 
 
 
 
 
586
 
587
  async def format_references(self, references: List[Dict], language: str = "english") -> Dict[str, Any]:
588
  """Format references according to J&J guidelines"""
 
590
  logger.info(f"Content language: {language}")
591
 
592
  try:
593
+ # Use direct OpenAI client like in app.py
594
+ completion = self.llm.chat.completions.create(
595
+ model="openai/gpt-4o-mini",
596
+ messages=[
597
+ {"role": "system", "content": self.system_prompt},
598
+ {"role": "user", "content": f"References to format:\n{json.dumps(references, indent=2)}\n\nContent Language: {language}"}
599
+ ],
600
+ temperature=0.1,
601
+ max_tokens=2000
602
  )
603
 
604
+ response_content = completion.choices[0].message.content
605
+ result = json.loads(response_content)
606
+
607
  formatted_count = len(result.get("formatted_references", []))
608
  logger.info(f"Reference formatting complete: {formatted_count} references processed")
609
 
 
620
  logger.info("Initializing Research Team Workflow")
621
 
622
  # Initialize LLM
623
+ self.llm = create_openai_client()
 
 
 
 
624
 
625
  # Initialize agents
626
  self.analyzer = AnalyzerAgent(self.llm)
 
788
  """Assemble final results"""
789
  logger.info("STEP 6: Final Assembly - Generating comprehensive report")
790
 
791
+ # Generate anchoring summary for statistics
792
+ anchoring_summary = self._generate_anchoring_summary(state["anchoring_results"])
793
+
794
  final_output = {
795
+ # DETAILED CONTENT ANALYSIS (Priority Content)
796
+ "detailed_analysis": {
797
+ "claims_extracted": {
798
+ "all_claims": state["all_claims"],
799
+ "core_claims": state["core_claims"],
800
+ "total_claims_found": len(state["all_claims"]),
801
+ "core_claims_count": len(state["core_claims"])
802
+ },
803
+ "anchoring_results": {
804
+ "detailed_anchoring": state["anchoring_results"],
805
+ "claims_with_evidence": [
806
+ {
807
+ "claim_id": result["claim_id"],
808
+ "claim_text": result["claim_text"],
809
+ "validation_status": result.get("validation_status", "unknown"),
810
+ "supporting_evidence": result.get("supporting_passages", []),
811
+ "anchored_references": result.get("anchored_references", []),
812
+ "quality_assessment": result.get("quality_assessment", "")
813
+ }
814
+ for result in state["anchoring_results"]
815
+ ]
816
+ },
817
+ "formatted_references": {
818
+ "references": state["formatted_references"],
819
+ "reference_details": [
820
+ {
821
+ "reference_id": ref.get("id", ""),
822
+ "formatted_citation": ref.get("formatted", ""),
823
+ "source_type": ref.get("source_type", ""),
824
+ "completion_status": ref.get("completion_status", "")
825
+ }
826
+ for ref in state["formatted_references"]
827
+ ]
828
+ }
829
  },
830
+
831
+ # SUMMARY STATISTICS (Secondary Information)
832
+ "summary_statistics": {
833
+ "document_metadata": {
834
+ "product": state["product"],
835
+ "countries": state["countries"],
836
+ "language": state["language"]
837
+ },
838
+ "claims_analysis": {
839
+ "total_claims": len(state["all_claims"]),
840
+ "core_claims_count": len(state["core_claims"])
841
+ },
842
+ "claims_anchoring": {
843
+ "summary": anchoring_summary
844
+ },
845
+ "reference_formatting": {
846
+ "total_references": len(state["formatted_references"])
847
+ },
848
+ "processing_status": state.get("processing_status", {})
849
+ }
850
  }
851
 
852
  state["final_output"] = final_output
853
 
854
  # Log final summary
 
855
  logger.info("FINAL RESULTS SUMMARY:")
856
+ logger.info(f" Total claims processed: {final_output['summary_statistics']['claims_analysis']['total_claims']}")
857
+ logger.info(f" Core claims: {final_output['summary_statistics']['claims_analysis']['core_claims_count']}")
858
+ logger.info(f" Successfully validated: {anchoring_summary['successfully_validated']}")
859
+ logger.info(f" Validation rate: {anchoring_summary['validation_rate']:.1%}")
860
+ logger.info(f" References formatted: {final_output['summary_statistics']['reference_formatting']['total_references']}")
861
  logger.info("STEP 6 COMPLETE: Research Team workflow finished successfully!")
862
 
863
  return state