Spaces:

kcheng0816
/

BibleStudy

Sleeping

App Files Files Community

kcheng0816 commited on Mar 16

Commit

ed05693

1 Parent(s): 8e6e753

update app.py for repeated quiz question

Browse files

Files changed (1) hide show

app.py +95 -23

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 import os
 import re
-import random
 import uuid
 from dotenv import load_dotenv
 import chainlit as cl
 from langchain.docstore.document import Document
@@ -21,18 +22,22 @@ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, Tool
 from langchain_core.tools import tool
 from langchain_community.tools.tavily_search import TavilySearchResults
 from functools import partial
-from typing import Any, Callable, List, Optional, TypedDict, Union
 from langchain_core.messages import AnyMessage
 from langgraph.graph.message import add_messages
 from typing import TypedDict, Annotated
 from langgraph.prebuilt import ToolNode
 from langgraph.graph import StateGraph, END
-import json
 # Load API Keys
 load_dotenv()
 os.environ["LANGCHAIN_PROJECT"] = f"AIE5- Bible Study Tool - {uuid.uuid4().hex[0:8]}"
 os.environ["LANGCHAIN_TRACING_V2"] = "true"
 path = "data/"
 book = "Genesis"
@@ -94,14 +99,23 @@ points = [
 client.upsert(collection_name=collection_name, points=points)
 # Cached embedder
-safe_namespace = "AIE5_BibleStudyTool"
-store = LocalFileStore("./cache/")
-cached_embedder = CacheBackedEmbeddings.from_bytes_store(
-    huggingface_embeddings, store, namespace=safe_namespace, batch_size=32
-)
-# Retrieval functions (unchanged from original)
 def parse_verse_reference(ref: str):
     match = re.match(r"(\w+(?:\s\w+)?)\s(\d+):([\d,-]+)", ref)
     if not match:
         return None
@@ -117,6 +131,18 @@ def parse_verse_reference(ref: str):
     return book, chapter, verses
 def retrieve_verse_content(verse_range: str, client: QdrantClient):
     parsed = parse_verse_reference(verse_range)
     if not parsed:
         return "Invalid verse range format."
@@ -146,12 +172,29 @@ def retrieve_verse_content(verse_range: str, client: QdrantClient):
     return docs
 def retrieve_documents(question: str, collection_name: str, client: QdrantClient):
     reference_match = re.search(r"(\w+)\s?(\d+):\s?([\d,-]+)", question)
     if reference_match:
         verse_range = reference_match.group(1) + ' ' + reference_match.group(2) + ':' + reference_match.group(3)
         return retrieve_verse_content(verse_range, client)
     else:
-        query_vector = cached_embedder.embed_query(question)
         search_result = client.query_points(
             collection_name=collection_name,
             query=query_vector,
@@ -168,7 +211,7 @@ def retrieve_documents(question: str, collection_name: str, client: QdrantClient
             ]
         return "No relevant documents found."
-# RAG setup (unchanged from original)
 RAG_PROMPT = """\
 You are a helpful assistant who answers questions based on provided context. You must only use the provided context, and cannot use your own knowledge.
@@ -180,10 +223,6 @@ You are a helpful assistant who answers questions based on provided context. You
 """
 rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
-from langchain_openai import ChatOpenAI
-from langchain.chat_models import init_chat_model
-from langchain_core.rate_limiters import InMemoryRateLimiter
 rate_limiter = InMemoryRateLimiter(
     requests_per_second=1,
     check_every_n_seconds=0.1,
@@ -191,6 +230,7 @@ rate_limiter = InMemoryRateLimiter(
 )
 chat_model = init_chat_model("gpt-4o-mini", rate_limiter=rate_limiter)
 def create_retriever_runnable(collection_name: str, client: QdrantClient) -> RunnableLambda:
     return RunnableLambda(lambda question: retrieve_documents(question, collection_name, client))
@@ -226,10 +266,21 @@ def _generate_quiz_question(verse_range: str, client: QdrantClient):
     docs = retrieve_verse_content(verse_range, client)
     if isinstance(docs, str):
         return {"error": docs}
     verse_content = "\n".join(
         f"{doc.metadata['book']} {doc.metadata['chapter']}:{doc.metadata['verse']} - {doc.page_content}"
-        for doc in docs
     )
     quiz_prompt = ChatPromptTemplate.from_template(
         "Based on the following Bible verse(s), generate a multiple-choice quiz question with 4 options (A, B, C, D) "
         "and indicate the correct answer:\n\n"
@@ -243,7 +294,11 @@ def _generate_quiz_question(verse_range: str, client: QdrantClient):
         "Correct Answer: [Letter of correct answer]\n"
         "Explanation: [Brief explanation of why the answer is correct]\n"
     )
-    response = (quiz_prompt | chat_model).invoke({"verse_content": verse_content})
     response_text = response.content.strip()
     lines = response_text.split("\n")
     question = ""
@@ -261,6 +316,7 @@ def _generate_quiz_question(verse_range: str, client: QdrantClient):
             correct_answer = line[len("Correct Answer:"):].strip()
         elif line.startswith("Explanation:"):
             explanation = line[len("Explanation:"):].strip()
     return {
         "quiz_question": question,
         "options": options,
@@ -273,16 +329,17 @@ def _generate_quiz_question(verse_range: str, client: QdrantClient):
 generate_quiz_question_tool = partial(_generate_quiz_question, client=client)
 @tool
-def generate_quiz_question(verse_range: str):
     """Generate a quiz question based on the content of the specified verse range."""
     quiz_data = generate_quiz_question_tool(verse_range)
     return json.dumps(quiz_data)
-tool_belt = [ai_rag_tool, tavily_tool, generate_quiz_question]
 # LLM for agent reasoning
 llm = init_chat_model("gpt-4o", temperature=0, rate_limiter=rate_limiter)
 llm_with_tools = llm.bind_tools(tool_belt)
 # Define the state
 class AgentState(TypedDict):
@@ -299,9 +356,9 @@ system_message = SystemMessage(content="""You are a Bible study assistant. You c
 - Use the 'ai_rag_tool' to answer questions about the Bible.
 - Use the 'tavily_tool' to search the internet for additional information.
-- Use the 'generate_quiz_question' tool when the user requests to start a quiz on a specific verse range, such as 'start quiz on Genesis 1:1-10'.
-When the user requests a quiz, extract the verse range from their message and pass it to the 'generate_quiz_question' tool.""")
 from typing import Optional
@@ -310,7 +367,22 @@ from langgraph.graph.message import AnyMessage, add_messages
 from typing import Annotated
 def call_mode(state):
     last_message = state["messages"][-1]
     if state.get("in_quiz", False):
@@ -344,7 +416,7 @@ def call_mode(state):
             if user_input == "yes":
                 # Generate a new quiz question
                 verse_range = state["verse_range"]
-                quiz_data_str = generate_quiz_question(verse_range)
                 quiz_data = json.loads(quiz_data_str)
                 question = quiz_data["quiz_question"]
                 options = "\n".join([f"{k}: {v}" for k, v in quiz_data["options"].items()])
@@ -407,7 +479,7 @@ def call_mode(state):
         prev_message = state["messages"][-2]
         if isinstance(prev_message, AIMessage) and prev_message.tool_calls:
             tool_call = prev_message.tool_calls[0]
-            if tool_call["name"] == "generate_quiz_question":
                 # Start the quiz
                 quiz_data_str = last_message.content
                 quiz_data = json.loads(quiz_data_str)

 import os
 import re
+import json
 import uuid
+import random
 from dotenv import load_dotenv
 import chainlit as cl
 from langchain.docstore.document import Document
 from langchain_core.tools import tool
 from langchain_community.tools.tavily_search import TavilySearchResults
 from functools import partial
+from typing import Optional, TypedDict
 from langchain_core.messages import AnyMessage
 from langgraph.graph.message import add_messages
 from typing import TypedDict, Annotated
 from langgraph.prebuilt import ToolNode
 from langgraph.graph import StateGraph, END
+from langchain.chat_models import init_chat_model
+from langchain_core.rate_limiters import InMemoryRateLimiter
+from langchain_core.globals import set_llm_cache
+from langchain_core.caches import InMemoryCache
 # Load API Keys
 load_dotenv()
 os.environ["LANGCHAIN_PROJECT"] = f"AIE5- Bible Study Tool - {uuid.uuid4().hex[0:8]}"
 os.environ["LANGCHAIN_TRACING_V2"] = "true"
+print(os.environ["LANGCHAIN_PROJECT"])
 path = "data/"
 book = "Genesis"
 client.upsert(collection_name=collection_name, points=points)
 # Cached embedder
+#safe_namespace = "AIE5_BibleStudyTool"
+#store = LocalFileStore("./cache/")
+#cached_embedder = CacheBackedEmbeddings.from_bytes_store(
+#    huggingface_embeddings, store, namespace=safe_namespace, batch_size=32
+#)
+# Retrieval functions
 def parse_verse_reference(ref: str):
+    """
+    Parse a verse reference string into book, chapter, and a list of verse numbers.
+    Args:
+        ref (str): The verse reference, e.g., "Genesis 1:1-10".
+    Returns:
+        tuple: (book, chapter, verses) where verses is a list of integers, or None if invalid.
+    """
     match = re.match(r"(\w+(?:\s\w+)?)\s(\d+):([\d,-]+)", ref)
     if not match:
         return None
     return book, chapter, verses
 def retrieve_verse_content(verse_range: str, client: QdrantClient):
+    """
+    Retrieve Bible verses from Qdrant based on the specified verse range.
+    Parameters:
+    - verse_range (str): The verse range in the format "Book Chapter:Verses", e.g., "Genesis 1:1-5".
+    - client (QdrantClient): The Qdrant client to query the database.
+    Returns:
+    - list[Document]: A list of Document objects containing the verse text and metadata.
+    - str: An error message if the verse range is invalid or no verses are found.
+    """
+    # Parse the verse range into book, chapter, and verses
     parsed = parse_verse_reference(verse_range)
     if not parsed:
         return "Invalid verse range format."
     return docs
 def retrieve_documents(question: str, collection_name: str, client: QdrantClient):
+    """
+    Retrieve documents from a Qdrant collection based on the input question.
+    This function first checks if the question contains a specific Bible verse reference
+    (e.g., "Genesis 1:1-5"). If a reference is found, it retrieves the exact verses using
+    `retrieve_verse_content`. If no reference is found, it performs a semantic search
+    using embeddings to find the most relevant documents.
+    Parameters:
+    - question (str): The input question or query string.
+    - collection_name (str): The name of the Qdrant collection to search in.
+    - client (QdrantClient): The Qdrant client object used to interact with the database.
+    Returns:
+    - list[Document]: A list of Document objects containing the relevant verse text and metadata.
+    - str: An error message if no relevant documents are found or if the verse reference is invalid.
+    """
     reference_match = re.search(r"(\w+)\s?(\d+):\s?([\d,-]+)", question)
     if reference_match:
         verse_range = reference_match.group(1) + ' ' + reference_match.group(2) + ':' + reference_match.group(3)
         return retrieve_verse_content(verse_range, client)
     else:
+        query_vector = huggingface_embeddings.embed_query(question)
         search_result = client.query_points(
             collection_name=collection_name,
             query=query_vector,
             ]
         return "No relevant documents found."
+# RAG setup
 RAG_PROMPT = """\
 You are a helpful assistant who answers questions based on provided context. You must only use the provided context, and cannot use your own knowledge.
 """
 rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
 rate_limiter = InMemoryRateLimiter(
     requests_per_second=1,
     check_every_n_seconds=0.1,
 )
 chat_model = init_chat_model("gpt-4o-mini", rate_limiter=rate_limiter)
+set_llm_cache(InMemoryCache())
 def create_retriever_runnable(collection_name: str, client: QdrantClient) -> RunnableLambda:
     return RunnableLambda(lambda question: retrieve_documents(question, collection_name, client))
     docs = retrieve_verse_content(verse_range, client)
     if isinstance(docs, str):
         return {"error": docs}
+    # Randomly select a subset of verses if the range has more than 3 verses
+    num_verses = len(docs)
+    if num_verses > 3:
+        subset_size = random.randint(1, 3)
+        start_idx = random.randint(0, num_verses - subset_size)
+        selected_docs = docs[start_idx : start_idx + subset_size]
+    else:
+        selected_docs = docs
     verse_content = "\n".join(
         f"{doc.metadata['book']} {doc.metadata['chapter']}:{doc.metadata['verse']} - {doc.page_content}"
+        for doc in selected_docs
     )
     quiz_prompt = ChatPromptTemplate.from_template(
         "Based on the following Bible verse(s), generate a multiple-choice quiz question with 4 options (A, B, C, D) "
         "and indicate the correct answer:\n\n"
         "Correct Answer: [Letter of correct answer]\n"
         "Explanation: [Brief explanation of why the answer is correct]\n"
     )
+    # Use a higher temperature for more diverse question generation
+    chat_model_with_temp = chat_model.bind(temperature=0.8)
+    response = (quiz_prompt | chat_model_with_temp).invoke({"verse_content": verse_content})
     response_text = response.content.strip()
     lines = response_text.split("\n")
     question = ""
             correct_answer = line[len("Correct Answer:"):].strip()
         elif line.startswith("Explanation:"):
             explanation = line[len("Explanation:"):].strip()
     return {
         "quiz_question": question,
         "options": options,
 generate_quiz_question_tool = partial(_generate_quiz_question, client=client)
 @tool
+def quiz_question_generator(verse_range: str):
     """Generate a quiz question based on the content of the specified verse range."""
     quiz_data = generate_quiz_question_tool(verse_range)
     return json.dumps(quiz_data)
+tool_belt = [ai_rag_tool, tavily_tool, quiz_question_generator]
 # LLM for agent reasoning
 llm = init_chat_model("gpt-4o", temperature=0, rate_limiter=rate_limiter)
 llm_with_tools = llm.bind_tools(tool_belt)
+set_llm_cache(InMemoryCache())
 # Define the state
 class AgentState(TypedDict):
 - Use the 'ai_rag_tool' to answer questions about the Bible.
 - Use the 'tavily_tool' to search the internet for additional information.
+- Use the 'quiz_question_generator' tool when the user requests to start a quiz on a specific verse range, such as 'start quiz on Genesis 1:1-10'.
+When the user requests a quiz, extract the verse range from their message and pass it to the 'quiz_question_generator' tool.""")
 from typing import Optional
 from typing import Annotated
+#Agent function
 def call_mode(state):
+    """
+    Manage the conversation flow of the Bible Study Tool, focusing on quiz mode and regular interactions.
+    This function determines the next action in the conversation based on the user's input and the current state.
+    It handles quiz mode (processing answers, continuing or ending the quiz) and transitions to or from regular
+    question-answering mode. It also processes tool calls, such as starting a quiz, and delegates non-quiz queries
+    to a language model.
+    Parameters:
+    - state (dict): The current state of the conversation, containing messages, quiz status, and other data.
+    Returns:
+    - dict: An updated state dictionary with new messages and modified quiz-related fields as needed.
+    """
     last_message = state["messages"][-1]
     if state.get("in_quiz", False):
             if user_input == "yes":
                 # Generate a new quiz question
                 verse_range = state["verse_range"]
+                quiz_data_str = quiz_question_generator(verse_range)
                 quiz_data = json.loads(quiz_data_str)
                 question = quiz_data["quiz_question"]
                 options = "\n".join([f"{k}: {v}" for k, v in quiz_data["options"].items()])
         prev_message = state["messages"][-2]
         if isinstance(prev_message, AIMessage) and prev_message.tool_calls:
             tool_call = prev_message.tool_calls[0]
+            if tool_call["name"] == "quiz_question_generator":
                 # Start the quiz
                 quiz_data_str = last_message.content
                 quiz_data = json.loads(quiz_data_str)