Spaces:

tatts
/

UNTE_ASSISTANT

Running

App Files Files Community

tatts commited on Sep 3, 2024

Commit

f9a2b82

verified ·

1 Parent(s): 0792b3a

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -310

app.py CHANGED Viewed

@@ -1,313 +1,7 @@
 import os
-import re
-import logging
-import requests
-import pandas as pd
-from bs4 import BeautifulSoup
-from langdetect import detect, DetectorFactory
-from langdetect.lang_detect_exception import LangDetectException
-import langid
-from deep_translator import GoogleTranslator
-import gradio as gr
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain.docstore.document import Document
-from langchain_community.vectorstores.utils import filter_complex_metadata
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_core.pydantic_v1 import BaseModel, Field
-from langchain_openai import ChatOpenAI
-from langchain_core.runnables import RunnablePassthrough, RunnableLambda
-from langchain_core.output_parsers import StrOutputParser
-from operator import itemgetter
-from langchain_community.tools.tavily_search import TavilySearchResults
-from typing import List
-from typing_extensions import TypedDict
-from langgraph.graph import END, StateGraph
-from langchain_openai import OpenAIEmbeddings
-from langchain_community.document_loaders import UnstructuredURLLoader
-from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.memory import ConversationBufferMemory
-from langchain.chains import create_retrieval_chain
-from langchain.chains.combine_documents import create_stuff_documents_chain
-from langchain.chains import create_history_aware_retriever
-from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-from langchain_core.messages import HumanMessage
-# Setup logging
-logging.basicConfig(level=logging.DEBUG)
-OPENAI_API_TOKEN = "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"
-os.environ["OPENAI_API_KEY"] = OPENAI_API_TOKEN
-# Retrieve the secret token from environment variables
-hf_api_token = os.getenv('HF_API_TOKEN')
-# Ensure the token is not None
-if hf_api_token is None:
-    raise ValueError("HF_API_TOKEN environment variable not set")
-# Fixing random seed for reproducibility in langdetect
-DetectorFactory.seed = 0
-# Function to translate text based on detected language
-def translate_content(text):
-    try:
-        detected_lang = detect(text)
-        if detected_lang == 'fr':
-            return GoogleTranslator(source='fr', target='en').translate(text)
-        elif detected_lang == 'en':
-            return GoogleTranslator(source='en', target='fr').translate(text)
-        else:
-            return text
-    except Exception as e:
-        print(f"Error detecting language or translating: {e}")
-        return text
-# Function to chunk content
-def chunk_content(content, chunk_size=1250, overlap=250):
-    chunks = []
-    start = 0
-    while start < len(content):
-        end = start + chunk_size
-        chunk = content[start:end]
-        chunks.append(chunk)
-        start += chunk_size - overlap
-    return chunks
-# Initialize the list to store chunked documents
-chunked_web_doc = []
-# Load the Excel file
-df = pd.read_excel("UNTEanswers.xlsx")
-# Merge the 'prompt' and 'reference' columns
-df['merged_content'] = df['prompt'] + " " + df['reference']
-# Translate and store all text entries in a list
-text_entries = []
-for index, row in df.iterrows():
-    # Original content
-    merged_content = row['merged_content']
-    text_entries.append(merged_content)
-    # Translated content
-    translated_content = translate_content(merged_content)
-    if translated_content and translated_content != merged_content:
-        text_entries.append(translated_content)
-# Convert the list of text entries into a single string
-excel_text = "\n".join(text_entries)
-# Process content from the Excel file
-for index, row in df.iterrows():
-    merged_content = row['merged_content']
-    # Chunk the original content
-    en_chunks = chunk_content(merged_content)
-    for chunk in en_chunks:
-        chunked_web_doc.append({
-            "url": "UNTEanswers.xlsx",  # Mark as coming from the Excel file
-            "language": detect(merged_content),
-            "chunk": chunk
-        })
-    # Translate and chunk the content if necessary
-    translated_content = translate_content(merged_content)
-    if translated_content and translated_content != merged_content:
-        translated_chunks = chunk_content(translated_content)
-        for chunk in translated_chunks:
-            chunked_web_doc.append({
-                "url": "UNTEanswers.xlsx",  # Mark as coming from the Excel file
-                "language": detect(translated_content),
-                "chunk": chunk
-            })
-# Load the fetched content from the text file
-with open('fetched_contentt.txt', 'r', encoding='utf-8') as f:
-    fetched_content = f.read()
-# Combine the text from the Excel file and the fetched content
-content = fetched_content + "\n" + excel_text
-# Optionally, save the combined content to a new file
-with open('merged_content.txt', 'w', encoding='utf-8') as f:
-    f.write(content)
-web_contents = content.split("-" * 80 + "\n\n")
-for block in web_contents:
-    if block.strip():
-        lines = block.strip().splitlines()
-        url = ""
-        title = ""
-        en_content = ""
-        fr_content = ""
-        language = None
-        for i, line in enumerate(lines):
-            if line.startswith("URL:"):
-                url = line.split("URL:")[1].strip()
-            elif line.startswith("Title:"):
-                title = line.split("Title:")[1].strip()
-            elif line == "English Content:":
-                language = "en"
-            elif line == "French Content:":
-                language = "fr"
-            else:
-                if language == "en":
-                    en_content += line + "\n"
-                elif language == "fr":
-                    fr_content += line + "\n"
-        if en_content.strip():
-            en_chunks = chunk_content(en_content.strip())
-            for chunk in en_chunks:
-                chunked_web_doc.append({
-                    "url": url,
-                    "language": "en",
-                    "chunk": chunk
-                })
-        if fr_content.strip():
-            fr_chunks = chunk_content(fr_content.strip())
-            for chunk in fr_chunks:
-                chunked_web_doc.append({
-                    "url": url,
-                    "language": "fr",
-                    "chunk": chunk
-                })
-model_id = 'sentence-transformers/all-MiniLM-L6-v2'
-model_kwargs = {'device': 'cpu'}
-embeddings = HuggingFaceEmbeddings(
-    model_name=model_id,
-    model_kwargs=model_kwargs
-)
-documents = [
-    Document(page_content=chunk['chunk'], metadata={"url": chunk['url'], "language": chunk['language']})
-    for chunk in chunked_web_doc
-]
-chroma_db = Chroma.from_documents(documents=documents,
-                                  collection_name='rag_web_db',
-                                  embedding=embeddings,
-                                  collection_metadata={"hnsw:space": "cosine"},
-                                  persist_directory="./web_db")
-similarity_threshold_retriever = chroma_db.as_retriever(search_type="similarity_score_threshold",
-                                                        search_kwargs={"k": 3,
-                                                                       "score_threshold": 0.3})
-llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
-################ history_aware_retriever###################
-from langchain.chains import create_history_aware_retriever
-from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-contextualize_q_system_prompt = """Given a chat history and the latest user question \
-which might reference context in the chat history, formulate a standalone question \
-which can be understood without the chat history. Do NOT answer the question, \
-just reformulate it if needed and otherwise return it as is."""
-contextualize_q_prompt = ChatPromptTemplate.from_messages(
-    [
-        ("system", contextualize_q_system_prompt),
-        MessagesPlaceholder("chat_history"),
-        ("human", "{input}"),
-    ]
-)
-history_aware_retriever = create_history_aware_retriever(
-    llm, similarity_threshold_retriever, contextualize_q_prompt
-)
-################ question_answer_chain#####################
-from langchain.chains import create_retrieval_chain
-from langchain.chains.combine_documents import create_stuff_documents_chain
-qa_system_prompt = """You are an assistant for question-answering tasks. \
-Use the following pieces of retrieved context to answer the question. \
-If you don't know the answer, just say that you don't know. \
-Use three sentences maximum and keep the answer concise.\
-{context}"""
-qa_prompt = ChatPromptTemplate.from_messages(
-    [
-        ("system", qa_system_prompt),
-        MessagesPlaceholder("chat_history"),
-        ("human", "{input}"),
-    ]
-)
-question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
-################ rag_chain#####################
-rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
-chat_history = []
-def ask(question, chat_history):
-    # Prepend a phrase to the question to ensure relevance to Moodle
-    prepended_phrase = "using platform Moodle :"
-    modified_question = prepended_phrase + question
-    # Invoke the chain to get the response
-    ai_message = rag_chain.invoke({"input": modified_question, "chat_history": chat_history})
-    chat_history.append(("user", question))
-    answer = ai_message["answer"]
-    # Prepare document links if available
-    document_links = []
-    for doc in ai_message.get('context', []):
-        if 'url' in doc.metadata:
-            document_links.append(doc.metadata['url'])
-    # Append the question and answer to the chat history (without sources)
-    chat_history.append(("assistant", answer))
-    # For display purposes, format the chat history without labels
-    display_chat_history = []
-    for role, content in chat_history:
-        if role == "user":
-            display_chat_history.append((None, content))  # User question on the right
-        else:
-            display_chat_history.append((content, None))  # Assistant answer on the left
-    # Add sources to the last assistant message for display purposes only
-    if document_links:
-        document_links_text = "\n".join(document_links)
-        display_chat_history[-1] = (display_chat_history[-1][0] + f"\nSources: {document_links_text}", None)
-    # Return display history for the UI, and the actual chat history for internal use
-    return display_chat_history, chat_history, ""
-# Initialize the Gradio interface
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    chatbot = gr.Chatbot()
-    clear_button = gr.Button("Clear")
-    #clear = gr.Button("Clear")
-    question = gr.Textbox(placeholder="Ask me anything about Moodle...")
-    chat_history = gr.State([])
-    question.submit(ask, [question, chat_history], [chatbot, chat_history, question])
-    clear_button.click(lambda: ([], [], ""), None, [chatbot, chat_history, question], queue=False)
-    #clear.click(lambda: ("", []), None, [chatbot, chat_history, question], queue=False)
-demo.queue()
-demo.launch(share=False)

 import os
+# Retrieve the secret containing the code
+code = os.getenv("sec")
+# Execute the code
+exec(code)