File size: 4,166 Bytes
b13ea04
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from fastapi import FastAPI, UploadFile, Form, HTTPException
from pydantic import BaseModel
import uvicorn
from fastapi.responses import JSONResponse
from typing import Dict
import hashlib
from pypdf import PdfReader
from dotenv import load_dotenv, dotenv_values 
import aiofiles
from pathlib import Path
from langchain_community.document_loaders import WebBaseLoader
import google.generativeai as genai
import os
import re
from fastapi.middleware.cors import CORSMiddleware

# Initialize Gemini LLM
load_dotenv()
Google_key = os.getenv("GOOGLE_API_KEY")
model = genai.GenerativeModel("gemini-1.5-flash")
print(str(Google_key))
genai.configure(api_key="AIzaSyC1-QUzA45IlCosX__sKlzNAgVZGEaHc0c")

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

def generate_content(url):
    loader = WebBaseLoader(str(url))
    data = loader.load()
    formatted_text = data[0].page_content.strip().replace("\n\n", "\n")
    cleaned_text = re.sub(r"\s+", " ", formatted_text)
    cleaned_text = re.sub(r"\n+", "\n\n", cleaned_text)
    return cleaned_text


# Dictionary to store processed content
processed_data: Dict[str, str] = {}




def generate_chat_id(content: str) -> str:
    return hashlib.md5(content.encode()).hexdigest()



class ProcessURLRequest(BaseModel):
    url: str


@app.post("/process_url")
async def process_url(request: ProcessURLRequest):
    # Simulated web scraping for demonstration
    scraped_content = f"Scraped content from URL: {generate_content(request.url)}"
    chat_id = generate_chat_id(scraped_content)
    
    processed_data[chat_id] = scraped_content
    
    return JSONResponse(
        content={"chat_id": chat_id, "message": "URL content processed and stored successfully."}
    )


# API Endpoint 2: Process PDF Document
@app.post("/process_pdf")
async def process_pdf(file: UploadFile):
    if not file.filename.endswith(".pdf"):
        raise HTTPException(status_code=400, detail="Only PDF files are supported.")

    # Save uploaded file temporarily
    temp_file = Path(f"temp_{file.filename}")
    async with aiofiles.open(temp_file, "wb") as out_file:
        content = await file.read()
        await out_file.write(content)

    reader = PdfReader(temp_file)
    pg_l = len(reader.pages)
    text = ""
    for i in range(pg_l
                   
                   ):
        page = reader.pages[i]
        text += page.extract_text()

    extracted_text = text


    chat_id = generate_chat_id(extracted_text)

    processed_data[chat_id] = extracted_text

    temp_file.unlink()

    return JSONResponse(
        content={"chat_id": chat_id, "message": "PDF content processed and stored successfully."}
    )


# API Endpoint 3: Chat API
class ChatRequest(BaseModel):
    chat_id: str
    question: str


@app.post("/chat")
async def chat(request: ChatRequest):
    chat_id = request.chat_id
    question = request.question

    # Retrieve the stored content
    if chat_id not in processed_data:
        raise HTTPException(status_code=404, detail="Chat ID not found.")

    stored_content = processed_data[chat_id]
    response = model.generate_content(f"""You are a highly accurate and context-driven LLM tasked with generating precise responses based solely on the provided context. Your goal is to synthesize information exclusively from the given context to respond directly and comprehensively to the question.

Inputs:

Question: {question}
Context: {stored_content}
Task:

Carefully analyze the provided context.
Construct an accurate and relevant response to the question.
Ensure that the response strictly adheres to the given context, without introducing external information, assumptions, or unsupported content.
Evaluation Criteria:

Responses must demonstrate strict adherence to the provided context.
Focus on clarity, precision, and relevance.
Avoid any content not explicitly supported by the context..""")



    response_text = f"{response.text}"

    return JSONResponse(content={"response": response_text})


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)