Spaces:
Running
Running
Upload 8 files
Browse files
chats.db
CHANGED
Binary files a/chats.db and b/chats.db differ
|
|
main.py
CHANGED
@@ -20,6 +20,7 @@ import csv
|
|
20 |
import io
|
21 |
import pptx
|
22 |
from db import get_db, Chat, ChatMessage, User, Document, SessionLocal
|
|
|
23 |
|
24 |
from fastapi.security import OAuth2PasswordBearer
|
25 |
import requests
|
@@ -203,6 +204,53 @@ def extract_text_from_file(file: UploadFile):
|
|
203 |
except Exception as e:
|
204 |
raise HTTPException(status_code=400, detail=f"Error processing file: {str(e)}")
|
205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
206 |
@app.post("/chats/{chat_id}/upload")
|
207 |
async def upload_document(chat_id: str, file: UploadFile = File(...), user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
|
208 |
user_id = user_data["id"]
|
|
|
20 |
import io
|
21 |
import pptx
|
22 |
from db import get_db, Chat, ChatMessage, User, Document, SessionLocal
|
23 |
+
from pyqs import get_q_paper
|
24 |
|
25 |
from fastapi.security import OAuth2PasswordBearer
|
26 |
import requests
|
|
|
204 |
except Exception as e:
|
205 |
raise HTTPException(status_code=400, detail=f"Error processing file: {str(e)}")
|
206 |
|
207 |
+
|
208 |
+
@app.get("/searchBySubjectCode")
|
209 |
+
async def search_by_subject_code(subject_code: str, user_data: dict = Depends(decode_token)):
|
210 |
+
codes = requests.get(f"https://cl.thapar.edu/search1.php?term={subject_code}",verify=False).json()
|
211 |
+
return codes
|
212 |
+
|
213 |
+
|
214 |
+
@app.get("/chats/{chat_id}/importQPapers")
|
215 |
+
async def import_q_papers(chat_id: str, subject_code: str, user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
|
216 |
+
user_id = user_data["id"]
|
217 |
+
|
218 |
+
chat = db.query(Chat).filter(Chat.chat_id == chat_id, Chat.user_id == user_id).first()
|
219 |
+
if not chat:
|
220 |
+
raise HTTPException(status_code=404, detail="Chat not found")
|
221 |
+
|
222 |
+
q_papers = get_q_paper(subject_code)
|
223 |
+
if not q_papers:
|
224 |
+
raise HTTPException(status_code=404, detail="No question papers found for the given subject code")
|
225 |
+
|
226 |
+
for paper in q_papers:
|
227 |
+
download_link = paper["DownloadLink"]
|
228 |
+
response = requests.get(download_link, verify=False)
|
229 |
+
if response.status_code != 200:
|
230 |
+
raise HTTPException(status_code=500, detail=f"Failed to download the paper from {download_link}")
|
231 |
+
|
232 |
+
try:
|
233 |
+
pdf_reader = PyPDF2.PdfReader(io.BytesIO(response.content))
|
234 |
+
text = "\n".join([page.extract_text() for page in pdf_reader.pages])
|
235 |
+
except Exception as e:
|
236 |
+
raise HTTPException(status_code=500, detail=f"Failed to process PDF: {str(e)}")
|
237 |
+
|
238 |
+
title = f"{paper['CourseName']}_{paper['Year']}_{paper['Semester']}_{paper['ExamType']}..pdf"
|
239 |
+
doc_id = str(uuid.uuid4())
|
240 |
+
|
241 |
+
document = Document(
|
242 |
+
id=doc_id,
|
243 |
+
chat_id=chat_id,
|
244 |
+
name=title,
|
245 |
+
content=text,
|
246 |
+
timestamp=datetime.now()
|
247 |
+
)
|
248 |
+
db.add(document)
|
249 |
+
|
250 |
+
db.commit()
|
251 |
+
return {"message": "Question papers imported successfully"}
|
252 |
+
|
253 |
+
|
254 |
@app.post("/chats/{chat_id}/upload")
|
255 |
async def upload_document(chat_id: str, file: UploadFile = File(...), user_data: dict = Depends(decode_token), db: SessionLocal = Depends(get_db)):
|
256 |
user_id = user_data["id"]
|
pyqs.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
from bs4 import BeautifulSoup
|
3 |
+
|
4 |
+
def get_q_paper(ccode):
|
5 |
+
files = {
|
6 |
+
'ccode': (None, ccode),
|
7 |
+
'submit': (None, ''),
|
8 |
+
}
|
9 |
+
|
10 |
+
response = requests.post('https://cl.thapar.edu/view1.php', files=files,verify=False)
|
11 |
+
return extract_question_papers(response.text)
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
def extract_question_papers(html_content):
|
16 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
17 |
+
table = soup.find('table', {'border': '1'}) # Find the table with question papers
|
18 |
+
rows = table.find_all('tr')[2:] # Skip the header rows
|
19 |
+
|
20 |
+
question_papers = []
|
21 |
+
for row in rows:
|
22 |
+
columns = row.find_all('td')
|
23 |
+
if len(columns) == 6: # Ensure it's a valid row with 6 columns
|
24 |
+
course_code = columns[0].text.strip()
|
25 |
+
course_name = columns[1].text.strip()
|
26 |
+
year = columns[2].text.strip()
|
27 |
+
semester = columns[3].text.strip()
|
28 |
+
exam_type = columns[4].text.strip()
|
29 |
+
download_link = columns[5].find('a')['href'].strip()
|
30 |
+
|
31 |
+
question_papers.append({
|
32 |
+
'CourseCode': course_code,
|
33 |
+
'CourseName': course_name,
|
34 |
+
'Year': year,
|
35 |
+
'Semester': semester,
|
36 |
+
'ExamType': exam_type,
|
37 |
+
'DownloadLink': "https://cl.thapar.edu/" + download_link,
|
38 |
+
})
|
39 |
+
|
40 |
+
return question_papers
|
41 |
+
|
42 |
+
|
43 |
+
# Example usage
|
44 |
+
if __name__ == "__main__":
|
45 |
+
result = get_q_paper("UCS414")
|
46 |
+
print(result)
|
requirements.txt
CHANGED
@@ -18,3 +18,4 @@ python-dotenv
|
|
18 |
sqlalchemy-utils
|
19 |
gunicorn
|
20 |
python-pptx
|
|
|
|
18 |
sqlalchemy-utils
|
19 |
gunicorn
|
20 |
python-pptx
|
21 |
+
bs4
|