Spaces:

kcheng0816
/

BibleStudy

Sleeping

App Files Files

xet

Community

BibleStudy / app.py

kcheng0816

update app.py for repeated quiz question

ed05693 7 months ago

raw

history blame

22.8 kB

	import os
	import re
	import json
	import uuid
	import random
	from dotenv import load_dotenv
	import chainlit as cl
	from langchain.docstore.document import Document
	from bs4 import BeautifulSoup
	from langchain_huggingface import HuggingFaceEmbeddings
	from qdrant_client import QdrantClient
	from qdrant_client.http.models import VectorParams, Distance
	from qdrant_client.http.models import PointStruct
	from langchain.storage import LocalFileStore
	from langchain.embeddings import CacheBackedEmbeddings
	from qdrant_client.http.models import Filter, FieldCondition, MatchValue, MatchAny
	from langchain.prompts import ChatPromptTemplate
	from langchain_core.runnables import RunnablePassthrough
	from langchain_core.output_parsers import StrOutputParser
	from langchain_core.runnables import RunnableLambda
	from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
	from langchain_core.tools import tool
	from langchain_community.tools.tavily_search import TavilySearchResults
	from functools import partial
	from typing import Optional, TypedDict
	from langchain_core.messages import AnyMessage
	from langgraph.graph.message import add_messages
	from typing import TypedDict, Annotated
	from langgraph.prebuilt import ToolNode
	from langgraph.graph import StateGraph, END
	from langchain.chat_models import init_chat_model
	from langchain_core.rate_limiters import InMemoryRateLimiter
	from langchain_core.globals import set_llm_cache
	from langchain_core.caches import InMemoryCache

	# Load API Keys
	load_dotenv()
	os.environ["LANGCHAIN_PROJECT"] = f"AIE5- Bible Study Tool - {uuid.uuid4().hex[0:8]}"
	os.environ["LANGCHAIN_TRACING_V2"] = "true"
	print(os.environ["LANGCHAIN_PROJECT"])

	path = "data/"
	book = "Genesis"
	collection_name = "genesis_study"

	# Load Genesis documents (unchanged from original)
	def load_genesis_documents(path, book_name):
	documents = []
	for file in os.listdir(path):
	if file.endswith(".html"):
	file_path = os.path.join(path, file)
	with open(file_path, "r", encoding="utf-8") as f:
	soup = BeautifulSoup(f, "html.parser")
	p_tags = soup.find_all("p", align="left")
	for p_tag in p_tags:
	verse_texts = [content.strip() for content in p_tag.contents
	if isinstance(content, str) and content.strip()]
	for verse in verse_texts:
	match = re.match(r"\[(\d+):(\d+)\]\s(.)", verse)
	if match:
	chapter = int(match.group(1))
	verse_num = int(match.group(2))
	text = match.group(3)
	doc = Document(
	page_content=text,
	metadata={"book": book_name, "chapter": chapter, "verse": verse_num}
	)
	documents.append(doc)
	return documents

	documents = load_genesis_documents(path, book)

	# Initialize embeddings
	huggingface_embeddings = HuggingFaceEmbeddings(model_name="kcheng0816/finetuned_arctic_genesis")
	dimension = len(huggingface_embeddings.embed_query("test"))

	# Set up Qdrant client and collection
	client = QdrantClient(":memory:")
	client.create_collection(
	collection_name=collection_name,
	vectors_config=VectorParams(size=dimension, distance=Distance.COSINE)
	)

	# Generate and upload embeddings
	embeddings = huggingface_embeddings.embed_documents([doc.page_content for doc in documents])
	points = [
	PointStruct(
	id=str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{doc.metadata['chapter']}_{doc.metadata['verse']}")),
	vector=embedding,
	payload={
	"text": doc.page_content,
	"book": doc.metadata["book"],
	"chapter": doc.metadata["chapter"],
	"verse": doc.metadata["verse"]
	}
	)
	for embedding, doc in zip(embeddings, documents)
	]
	client.upsert(collection_name=collection_name, points=points)

	# Cached embedder
	#safe_namespace = "AIE5_BibleStudyTool"
	#store = LocalFileStore("./cache/")
	#cached_embedder = CacheBackedEmbeddings.from_bytes_store(
	# huggingface_embeddings, store, namespace=safe_namespace, batch_size=32
	#)

	# Retrieval functions
	def parse_verse_reference(ref: str):
	"""
	Parse a verse reference string into book, chapter, and a list of verse numbers.

	Args:
	ref (str): The verse reference, e.g., "Genesis 1:1-10".

	Returns:
	tuple: (book, chapter, verses) where verses is a list of integers, or None if invalid.
	"""
	match = re.match(r"(\w+(?:\s\w+)?)\s(\d+):([\d,-]+)", ref)
	if not match:
	return None
	book, chapter, verse_part = match.groups()
	chapter = int(chapter)
	verses = []
	for part in verse_part.split(','):
	if '-' in part:
	start, end = map(int, part.split('-'))
	verses.extend(range(start, end + 1))
	else:
	verses.append(int(part))
	return book, chapter, verses

	def retrieve_verse_content(verse_range: str, client: QdrantClient):
	"""
	Retrieve Bible verses from Qdrant based on the specified verse range.

	Parameters:
	- verse_range (str): The verse range in the format "Book Chapter:Verses", e.g., "Genesis 1:1-5".
	- client (QdrantClient): The Qdrant client to query the database.

	Returns:
	- list[Document]: A list of Document objects containing the verse text and metadata.
	- str: An error message if the verse range is invalid or no verses are found.
	"""
	# Parse the verse range into book, chapter, and verses
	parsed = parse_verse_reference(verse_range)
	if not parsed:
	return "Invalid verse range format."
	book, chapter, verses = parsed
	filter = Filter(
	must=[
	FieldCondition(key="book", match=MatchValue(value=book)),
	FieldCondition(key="chapter", match=MatchValue(value=chapter)),
	FieldCondition(key="verse", match=MatchAny(any=verses))
	]
	)
	search_result = client.scroll(
	collection_name=collection_name,
	scroll_filter=filter,
	limit=len(verses)
	)
	if not search_result[0]:
	return "No verses found for the specified range."
	sorted_points = sorted(search_result[0], key=lambda p: p.payload["verse"])
	docs = [
	Document(
	page_content=p.payload["text"],
	metadata=p.payload
	)
	for p in sorted_points
	]
	return docs

	def retrieve_documents(question: str, collection_name: str, client: QdrantClient):
	"""
	Retrieve documents from a Qdrant collection based on the input question.

	This function first checks if the question contains a specific Bible verse reference
	(e.g., "Genesis 1:1-5"). If a reference is found, it retrieves the exact verses using
	`retrieve_verse_content`. If no reference is found, it performs a semantic search
	using embeddings to find the most relevant documents.

	Parameters:
	- question (str): The input question or query string.
	- collection_name (str): The name of the Qdrant collection to search in.
	- client (QdrantClient): The Qdrant client object used to interact with the database.

	Returns:
	- list[Document]: A list of Document objects containing the relevant verse text and metadata.
	- str: An error message if no relevant documents are found or if the verse reference is invalid.
	"""
	reference_match = re.search(r"(\w+)\s?(\d+):\s?([\d,-]+)", question)
	if reference_match:
	verse_range = reference_match.group(1) + ' ' + reference_match.group(2) + ':' + reference_match.group(3)
	return retrieve_verse_content(verse_range, client)
	else:
	query_vector = huggingface_embeddings.embed_query(question)
	search_result = client.query_points(
	collection_name=collection_name,
	query=query_vector,
	limit=5,
	with_payload=True
	).points
	if search_result:
	return [
	Document(
	page_content=point.payload["text"],
	metadata=point.payload
	)
	for point in search_result
	]
	return "No relevant documents found."

	# RAG setup
	RAG_PROMPT = """\
	You are a helpful assistant who answers questions based on provided context. You must only use the provided context, and cannot use your own knowledge.

	### Question
	{question}

	### Context
	{context}
	"""
	rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

	rate_limiter = InMemoryRateLimiter(
	requests_per_second=1,
	check_every_n_seconds=0.1,
	max_bucket_size=10,
	)

	chat_model = init_chat_model("gpt-4o-mini", rate_limiter=rate_limiter)
	set_llm_cache(InMemoryCache())

	def create_retriever_runnable(collection_name: str, client: QdrantClient) -> RunnableLambda:
	return RunnableLambda(lambda question: retrieve_documents(question, collection_name, client))

	retrieval_runnable = create_retriever_runnable(collection_name, client)

	def format_docs(docs):
	if isinstance(docs, str):
	return docs
	return "\n\n".join(f"Genesis {doc.metadata['chapter']}:{doc.metadata['verse']} - {doc.page_content}" for doc in docs)

	rag_chain = (
	{"context": retrieval_runnable \| RunnableLambda(format_docs), "question": RunnablePassthrough()}
	\| RunnablePassthrough.assign(response=rag_prompt \| chat_model \| StrOutputParser())
	)

	# Tools
	def format_contexts(docs):
	return "\n\n".join(docs) if isinstance(docs, list) else docs

	@tool
	def ai_rag_tool(question: str):
	"""Useful for when you need to answer questions about Bible"""
	response = rag_chain.invoke(question)
	return {
	"message": [HumanMessage(content=response["response"])],
	"context": format_contexts(response["context"])
	}

	tavily_tool = TavilySearchResults(max_results=5)

	def _generate_quiz_question(verse_range: str, client: QdrantClient):
	docs = retrieve_verse_content(verse_range, client)
	if isinstance(docs, str):
	return {"error": docs}

	# Randomly select a subset of verses if the range has more than 3 verses
	num_verses = len(docs)
	if num_verses > 3:
	subset_size = random.randint(1, 3)
	start_idx = random.randint(0, num_verses - subset_size)
	selected_docs = docs[start_idx : start_idx + subset_size]
	else:
	selected_docs = docs

	verse_content = "\n".join(
	f"{doc.metadata['book']} {doc.metadata['chapter']}:{doc.metadata['verse']} - {doc.page_content}"
	for doc in selected_docs
	)

	quiz_prompt = ChatPromptTemplate.from_template(
	"Based on the following Bible verse(s), generate a multiple-choice quiz question with 4 options (A, B, C, D) "
	"and indicate the correct answer:\n\n"
	"{verse_content}\n\n"
	"Format your response as follows:\n"
	"Question: [Your question here]\n"
	"A: [Option A]\n"
	"B: [Option B]\n"
	"C: [Option C]\n"
	"D: [Option D]\n"
	"Correct Answer: [Letter of correct answer]\n"
	"Explanation: [Brief explanation of why the answer is correct]\n"
	)

	# Use a higher temperature for more diverse question generation
	chat_model_with_temp = chat_model.bind(temperature=0.8)
	response = (quiz_prompt \| chat_model_with_temp).invoke({"verse_content": verse_content})

	response_text = response.content.strip()
	lines = response_text.split("\n")
	question = ""
	options = {}
	correct_answer = ""
	explanation = ""
	for line in lines:
	line = line.strip()
	if line.startswith("Question:"):
	question = line[len("Question:"):].strip()
	elif line.startswith(("A:", "B:", "C:", "D:")):
	key, value = line.split(":", 1)
	options[key.strip()] = value.strip()
	elif line.startswith("Correct Answer:"):
	correct_answer = line[len("Correct Answer:"):].strip()
	elif line.startswith("Explanation:"):
	explanation = line[len("Explanation:"):].strip()

	return {
	"quiz_question": question,
	"options": options,
	"correct_answer": correct_answer,
	"explanation": explanation,
	"verse_range": verse_range,
	"verse_content": verse_content
	}

	generate_quiz_question_tool = partial(_generate_quiz_question, client=client)

	@tool
	def quiz_question_generator(verse_range: str):
	"""Generate a quiz question based on the content of the specified verse range."""
	quiz_data = generate_quiz_question_tool(verse_range)
	return json.dumps(quiz_data)

	tool_belt = [ai_rag_tool, tavily_tool, quiz_question_generator]

	# LLM for agent reasoning
	llm = init_chat_model("gpt-4o", temperature=0, rate_limiter=rate_limiter)
	llm_with_tools = llm.bind_tools(tool_belt)
	set_llm_cache(InMemoryCache())

	# Define the state
	class AgentState(TypedDict):
	messages: Annotated[list[AnyMessage], add_messages]
	in_quiz: bool
	quiz_question: Optional[dict]
	verse_range: Optional[str]
	quiz_score: int
	quiz_total: int
	waiting_for_answer: bool

	# System message
	system_message = SystemMessage(content="""You are a Bible study assistant. You can answer questions about the Bible, search the internet for related information, or generate quiz questions based on specific verse ranges.

	- Use the 'ai_rag_tool' to answer questions about the Bible.
	- Use the 'tavily_tool' to search the internet for additional information.
	- Use the 'quiz_question_generator' tool when the user requests to start a quiz on a specific verse range, such as 'start quiz on Genesis 1:1-10'.

	When the user requests a quiz, extract the verse range from their message and pass it to the 'quiz_question_generator' tool.""")


	from typing import Optional
	from typing_extensions import TypedDict
	from langgraph.graph.message import AnyMessage, add_messages
	from typing import Annotated


	#Agent function
	def call_mode(state):
	"""
	Manage the conversation flow of the Bible Study Tool, focusing on quiz mode and regular interactions.

	This function determines the next action in the conversation based on the user's input and the current state.
	It handles quiz mode (processing answers, continuing or ending the quiz) and transitions to or from regular
	question-answering mode. It also processes tool calls, such as starting a quiz, and delegates non-quiz queries
	to a language model.

	Parameters:
	- state (dict): The current state of the conversation, containing messages, quiz status, and other data.

	Returns:
	- dict: An updated state dictionary with new messages and modified quiz-related fields as needed.
	"""
	last_message = state["messages"][-1]

	if state.get("in_quiz", False):
	if state.get("waiting_for_answer", False):
	# Process the user's answer
	quiz_data = state["quiz_question"]
	user_answer = last_message.content.strip().upper()
	correct_answer = quiz_data["correct_answer"]
	new_quiz_total = state["quiz_total"] + 1
	if user_answer == correct_answer:
	new_quiz_score = state["quiz_score"] + 1
	feedback = f"Correct! {quiz_data['explanation']}"
	else:
	new_quiz_score = state["quiz_score"]
	feedback = f"Incorrect. The correct answer is {correct_answer}. {quiz_data['explanation']}"
	return {
	"messages": [
	AIMessage(content=feedback),
	AIMessage(content="Would you like another question? Type 'Yes' to continue or 'No' to end the quiz.")
	],
	"quiz_total": new_quiz_total,
	"quiz_score": new_quiz_score,
	"waiting_for_answer": False,
	"quiz_question": state["quiz_question"],
	"in_quiz": True,
	"verse_range": state["verse_range"]
	}
	else:
	# Handle the user's decision to continue or stop the quiz
	user_input = last_message.content.strip().lower()
	if user_input == "yes":
	# Generate a new quiz question
	verse_range = state["verse_range"]
	quiz_data_str = quiz_question_generator(verse_range)
	quiz_data = json.loads(quiz_data_str)
	question = quiz_data["quiz_question"]
	options = "\n".join([f"{k}: {v}" for k, v in quiz_data["options"].items()])
	verse_content = quiz_data["verse_content"]
	message_to_user = (
	f"Based on the following verse(s):\n\n{verse_content}\n\n"
	f"Here's your quiz question:\n\n{question}\n\n{options}\n\n"
	"Please select your answer (A, B, C, or D)."
	)
	return {
	"messages": [AIMessage(content=message_to_user)],
	"quiz_question": quiz_data,
	"waiting_for_answer": True,
	"quiz_total": state["quiz_total"],
	"quiz_score": state["quiz_score"],
	"in_quiz": True,
	"verse_range": state["verse_range"]
	}
	elif user_input == "no":
	# End the quiz and provide a summary
	score = state["quiz_score"]
	total = state["quiz_total"]
	continue_message = "Ask me anything about Genesis or type 'start quiz on <verse range>' (e.g., 'start quiz on Genesis 1:1-5') for a trivia challenge."
	if total > 0:
	percentage = (score / total) * 100
	if percentage == 100:
	feedback = "Excellent! You got all questions correct. Please continue your Bible study!"
	elif percentage >= 80:
	feedback = "Great job! You have a strong understanding. Please continue your Bible study!"
	elif percentage >= 50:
	feedback = "Good effort! Keep practicing to improve. Please continue your Bible study!"
	else:
	feedback = "Don’t worry, keep your Bible studying and you’ll get better!"
	summary = f"You got {score} out of {total} questions correct. {feedback} \n\n {continue_message}"
	else:
	summary = "No questions were attempted."
	return {
	"messages": [AIMessage(content=summary)],
	"in_quiz": False,
	"quiz_question": None,
	"verse_range": None,
	"quiz_score": 0,
	"quiz_total": 0,
	"waiting_for_answer": False
	}
	else:
	# Handle invalid input
	return {
	"messages": [AIMessage(content="Please type 'Yes' to continue or 'No' to end the quiz.")],
	"quiz_total": state["quiz_total"],
	"quiz_score": state["quiz_score"],
	"waiting_for_answer": False,
	"quiz_question": state["quiz_question"],
	"in_quiz": True,
	"verse_range": state["verse_range"]
	}

	# Handle starting the quiz or other tool calls
	if len(state["messages"]) >= 2 and isinstance(last_message, ToolMessage):
	prev_message = state["messages"][-2]
	if isinstance(prev_message, AIMessage) and prev_message.tool_calls:
	tool_call = prev_message.tool_calls[0]
	if tool_call["name"] == "quiz_question_generator":
	# Start the quiz
	quiz_data_str = last_message.content
	quiz_data = json.loads(quiz_data_str)
	verse_range = quiz_data["verse_range"]
	question = quiz_data["quiz_question"]
	options = "\n".join([f"{k}: {v}" for k, v in quiz_data["options"].items()])
	verse_content = quiz_data["verse_content"]
	message_to_user = (
	f"Based on the following verse(s):\n\n{verse_content}\n\n"
	f"Here's your quiz question:\n\n{question}\n\n{options}\n\n"
	"Please select your answer (A, B, C, or D)."
	)
	return {
	"messages": [AIMessage(content=message_to_user)],
	"in_quiz": True,
	"verse_range": verse_range,
	"quiz_score": 0,
	"quiz_total": 0,
	"quiz_question": quiz_data,
	"waiting_for_answer": True
	}

	# Process regular questions or commands
	messages = [system_message] + state["messages"]
	response = llm_with_tools.invoke(messages)
	return {"messages": [response]}


	tool_node = ToolNode(tool_belt)

	def should_continue(state):
	last_message = state["messages"][-1]
	if last_message.tool_calls:
	return "action"
	return END

	# Build the graph
	uncompiled_graph = StateGraph(AgentState)
	uncompiled_graph.add_node("agent", call_mode)
	uncompiled_graph.add_node("action", tool_node)
	uncompiled_graph.set_entry_point("agent")
	uncompiled_graph.add_conditional_edges("agent", should_continue)
	uncompiled_graph.add_edge("action", "agent")
	compiled_graph = uncompiled_graph.compile()

	# Chainlit integration
	import chainlit as cl
	from langchain_core.messages import SystemMessage

	@cl.on_chat_start
	async def start():
	system_message = SystemMessage(content="Welcome to the Bible Study Tool!")
	initial_state = {
	"messages": [system_message],
	"in_quiz": False,
	"quiz_question": None,
	"verse_range": None,
	"quiz_score": 0,
	"quiz_total": 0,
	"waiting_for_answer": False
	}
	cl.user_session.set("state", initial_state)
	await cl.Message(content="Welcome to the Bible Study Tool! Ask me anything about Genesis or type 'start quiz on <verse range>' (e.g., 'start quiz on Genesis 1:1-5') for a trivia challenge.").send()


	@cl.on_message
	async def main(message: cl.Message):
	state = cl.user_session.get("state")
	current_messages = len(state["messages"])
	state["messages"].append(HumanMessage(content=message.content))
	result = compiled_graph.invoke(state)
	cl.user_session.set("state", result)
	new_messages = result["messages"][current_messages + 1:]
	for msg in new_messages:
	if isinstance(msg, AIMessage):
	await cl.Message(content=msg.content).send()