Spaces:

tferhan
/

dtgv2

Sleeping

File size: 7,832 Bytes

from langchain.prompts import StringPromptTemplate
import re
from document_scrapped import get_data
from qa_txt import conversation_chain
# from key_extract import chain
from langchain_core.prompts import PromptTemplate
from bs4 import BeautifulSoup
import requests
from data_process import *
from langchain.tools.base import StructuredTool
from langchain.agents import initialize_agent
from qa_txt import llm
from trans import trans
import pathlib
from pydantic import BaseModel
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware

# import threading, time
from langchain.agents import (
    create_react_agent,
    AgentExecutor,
    tool,
)
from langchain import hub
import os 

app = FastAPI(title="Data Gov MA API", description="API for Data Gov MA", version="1.0.0")


class Generate(BaseModel):
    text:str


file_text = pathlib.Path('intents_v2.txt').read_text()

prompt = hub.pull("hwchase17/react")

def faq(query: str) -> str:
    reponse = conversation_chain.invoke({"input": query, 'document': file_text})
    return reponse

qa_faq = StructuredTool.from_function(
    func = faq ,
    description="""
    Respond to general questions about the website like the documentation, contact, utility, support... Don't use it when the user request data about a subject (economie, justice, water, or any type of public dataset) only for contact or useful links data.

    Parameters :
    - query (string) : the same input as the user input no more no less and dont translate it even if it is in another language.

    Returns :
    - string : the output as returned from the function in french.
    """,
 
)

analyze_data = StructuredTool.from_function(
    func=get_data,
    description = """
    Analyze and summarize data from a given url asked by the user to summarize or analyze dont use it with the tool request_data and the user must write analyze before this function get executed.
    Parameters : 
    - url (string) : the url given by the user.
    Returns :
    - string : Once the data is outputed from the function it get summarized and returned to the user and it must be in french.
    Example:
    >>> analyse this data of this link : https://data.gov.ma/data/fr/dataset/evolution-des-affaires-au-cours-du-quinquennat-2018-2022
    >>> input : https://data.gov.ma/data/fr/dataset/evolution-des-affaires-au-cours-du-quinquennat-2018-2022
    """,
    return_direct = True,
)

def request_data(query: str) -> str:
    mot_cle = nettoyer_string(query)
    mots = mot_cle.split()
    ui = mots[0]
    rg = chercher_data(ui)
    if len(rg[0]):
      reponse_final = format_reponse(rg)
      return reponse_final
    else:
      return query

fetch_data = StructuredTool.from_function(
    func=request_data,
    description="""
    Request and fetch data using a search keyword.
    Parameters :
    - query (string) : A keyword in french about the subject of what to user is looking for, it must be always be in french and a noun if not convert it.For example if the user inputed "I want data about water" you need to input water in french which is "eau" same for other languages and the words translatted must be nouns not adjectives or verbs also the user may request data about an organization where you need to take just the main subject for example "Je veux les données de l'agence de développement digitale" you take just "développement".
    Returns :
    - string : the output as returned from the function in french , includes the link to all the data about the keyword along with an example.
    """,
    return_direct = True,
)
# def request_data(query: str) -> str:
#     request = chain.invoke({"input": query})
#     mot_cle = nettoyer_string(request)
#     mots = mot_cle.split()
#     ui = mots[0]
#     rg = chercher_data(ui)
#     if len(rg[0]):
#       reponse_final = format_reponse(rg)
#       return reponse_final
#     else:
#       return "Désolé, il semble que nous n'ayons pas de données correspondant à votre demande pour le moment. Avez-vous une autre question ou avez-vous besoin d'aide sur quelque chose d'autre?"

# fetch_data = StructuredTool.from_function(
#     func=request_data,
#     description="""
#     Request and fetch data using a search keyword.

#     Parameters :
#     - query (string) : the same input as the user input no more no less and always it must be in french if it isn't already. For example : "give me data about health" the input is health in french which is santé, same for other languages and the words translatted must be nouns not adjectives or verbs also the user may request data about an organization where you need to take just the main subject for example "Je veux les données de l'agence de développement digitale" you take just "développement".
#     Returns :
#     - string : the output as returned from the function in french , includes the link to all the data about the keyword along with an example.
#     """,
# )

def translate(query: str) -> str:
    translated = trans.invoke({"input": query})
    return translated

translate_text = StructuredTool.from_function(
    func=translate,
    description= """
    Translate from any language to french. Don't use it if the text is already in french. Use it only with the function request_data don't use it with the other tools

    Parameters :
    - query (string) : the same input as the user input no more no less only if it isnt in french already.
    Returns :
    - string : isolate just the translated text in french with no other useless words.
    """,
)



# template = '''Answer the following questions as best you can. You have access to the following tools:

# {tools}

# Use the following format:

# Question: the input question you must answer and must be in french if not translate it in french
# Thought: you should always think about what to do
# Action: the action to take, should be one of [{tool_names}]
# Action Input: the input to the action
# Observation: the result of the action, don't include /nObservation in the end of each observation just what you observed
# ... (this Thought/Action/Action Input/Observation can repeat N times) 
# Thought: I now know the final answer
# Final Answer: the final answer to the original input question and must always be in french no matter what.

# Begin!

# Question: {input}
# Thought:{agent_scratchpad}'''

# prompt_2 = PromptTemplate.from_template(template)

tools_add = [
    qa_faq,
    fetch_data,
    analyze_data,
    translate_text,
]

agent = create_react_agent(llm=llm, tools=tools_add, prompt=prompt)

agent_executor = AgentExecutor(
    agent=agent,
    tools=tools_add,
    verbose=True,
    max_iterations = 10,
    handle_parsing_errors=True,
    #max_execution_time = 45, optionel mais useful dans le deployement
    
)

def data_gov_ma(message, history = []):
  try:
    response = agent_executor.invoke({"input": message})
    final_response = response['output']
    timeout_iteration_error = 'Agent stopped due to iteration limit or time limit.'
    if final_response == timeout_iteration_error:
        return {"text":"Je suis désolé, je n'ai pas compris votre question.Pourriez-vous la reformuler s'il vous plaît ?"}
    else:
        return {"text":final_response}
  except ValueError as e:
    return {"text":"Je suis désolé, je n'ai pas compris votre question.Pourriez-vous la reformuler s'il vous plaît ?"}

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.get("/", tags=["Home"])
def api_home():
    return {'detail': 'Welcome to FastAPI TextGen Tutorial!'}

@app.post("/api/generate", summary="Generate text from prompt", tags=["Generate"], response_model=Generate)
def inference(input_prompt: str):
    return data_gov_ma(message=input_prompt)