Spaces:

PercivalFletcher
/

Chai-Tea-Latte

Sleeping

App Files Files Community

Chai-Tea-Latte / main.py

PercivalFletcher

Upload 7 files

5abe5ee verified 4 months ago

raw

history blame

5.89 kB

	# file: main.py
	import time
	import os
	import asyncio
	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel, HttpUrl
	from typing import List, Dict, Any
	from dotenv import load_dotenv

	# Assuming 'ingestion_router.py' is in the same directory and contains the function
	from ingestion_router import ingest_and_parse_document
	from chunking_parent import create_parent_child_chunks
	from embedding import EmbeddingClient
	from retrieval_parent import Retriever
	from generation import generate_answer

	load_dotenv()

	app = FastAPI(
	title="Modular RAG API",
	description="A modular API for Retrieval-Augmented Generation with Parent-Child Retrieval.",
	version="2.3.0", # Updated version
	)

	GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
	embedding_client = EmbeddingClient()
	retriever = Retriever(embedding_client=embedding_client)

	# --- Pydantic Models ---
	class RunRequest(BaseModel):
	documents: HttpUrl
	questions: List[str]

	class RunResponse(BaseModel):
	answers: List[str]

	class TestRequest(BaseModel):
	documents: HttpUrl

	# --- NEW: Test Endpoint for Ingestion and Parsing ---
	@app.post("/test/ingestion", response_model=Dict[str, Any], tags=["Testing"])
	async def test_ingestion_endpoint(request: TestRequest):
	"""
	Tests the complete ingestion and parsing pipeline.
	Downloads a document from a URL, processes it using the modular
	parsing strategy (e.g., parallel for PDF, standard for DOCX),
	and returns the extracted Markdown content and time taken.
	"""
	print("--- Running Document Ingestion & Parsing Test ---")
	start_time = time.perf_counter()
	try:
	# Step 1: Call the main ingestion function from your router
	markdown_content = await ingest_and_parse_document(request.documents)

	end_time = time.perf_counter()
	duration = end_time - start_time
	print(f"--- Ingestion and Parsing took {duration:.2f} seconds ---")

	if not markdown_content:
	raise HTTPException(
	status_code=404,
	detail="Document processed, but no content was extracted."
	)

	return {
	"total_time_seconds": duration,
	"character_count": len(markdown_content),
	"extracted_content": markdown_content,
	}
	except Exception as e:
	# Catch potential download errors, parsing errors, or unsupported file types
	raise HTTPException(status_code=500, detail=f"An error occurred during ingestion test: {str(e)}")


	# --- Test Endpoint for Parent-Child Chunking ---
	@app.post("/test/chunk", response_model=Dict[str, Any], tags=["Testing"])
	async def test_chunking_endpoint(request: TestRequest):
	"""
	Tests the parent-child chunking strategy.
	Returns parent chunks, child chunks, and the time taken.
	"""
	print("--- Running Parent-Child Chunking Test ---")
	start_time = time.perf_counter()

	try:
	# Step 1: Parse the document to get raw text
	markdown_content = await ingest_and_parse_document(request.documents)

	# Step 2: Create parent and child chunks
	child_documents, docstore, _ = create_parent_child_chunks(markdown_content)

	end_time = time.perf_counter()
	duration = end_time - start_time
	print(f"--- Parsing and Chunking took {duration:.2f} seconds ---")

	# Convert Document objects to a JSON-serializable list for the response
	child_chunk_results = [
	{"page_content": doc.page_content, "metadata": doc.metadata}
	for doc in child_documents
	]

	# Retrieve parent documents from the in-memory store
	parent_docs = docstore.mget(list(docstore.store.keys()))
	parent_chunk_results = [
	{"page_content": doc.page_content, "metadata": doc.metadata}
	for doc in parent_docs if doc
	]

	return {
	"total_time_seconds": duration,
	"parent_chunk_count": len(parent_chunk_results),
	"child_chunk_count": len(child_chunk_results),
	"parent_chunks": parent_chunk_results,
	"child_chunks": child_chunk_results,
	}
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"An error occurred during chunking test: {str(e)}")


	@app.post("/hackrx/run", response_model=RunResponse)
	async def run_rag_pipeline(request: RunRequest):
	try:
	print("--- Kicking off RAG Pipeline with Parent-Child Strategy ---")

	# --- STAGE 1: DOCUMENT INGESTION ---
	markdown_content = await ingest_and_parse_document(request.documents)

	# --- STAGE 2: PARENT-CHILD CHUNKING ---
	child_documents, docstore, _ = create_parent_child_chunks(markdown_content)

	if not child_documents:
	raise HTTPException(status_code=400, detail="Document could not be processed into chunks.")

	# --- STAGE 3: INDEXING ---
	retriever.index(child_documents, docstore)

	# --- STAGE 4: CONCURRENT RETRIEVAL & GENERATION ---
	print("Starting retrieval for all questions...")
	retrieval_tasks = [
	retriever.retrieve(q, GROQ_API_KEY)
	for q in request.questions
	]
	all_retrieved_chunks = await asyncio.gather(*retrieval_tasks)
	print("Retrieval complete. Starting final answer generation...")

	answer_tasks = [
	generate_answer(q, chunks, GROQ_API_KEY)
	for q, chunks in zip(request.questions, all_retrieved_chunks)
	]
	final_answers = await asyncio.gather(*answer_tasks)

	print("--- RAG Pipeline Completed Successfully ---")
	return RunResponse(answers=final_answers)

	except Exception as e:
	raise HTTPException(
	status_code=500, detail=f"An internal server error occurred: {str(e)}"
	)