Spaces:
Sleeping
Sleeping
| import re | |
| import time | |
| import random | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # Optional: Enable scraping if your site is deployed. | |
| ENABLE_SCRAPING = False | |
| SITE_URL = "https://your-agri-future-site.com" | |
| # Global variable to hold scraped content | |
| knowledge_base = "" | |
| # --- Optional: Scraping Functionality --- | |
| if ENABLE_SCRAPING: | |
| try: | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.common.by import By | |
| def scrape_site(url): | |
| options = Options() | |
| options.headless = True # Run browser in headless mode. | |
| driver = webdriver.Chrome(options=options) | |
| driver.get(url) | |
| # Use explicit waits in production; here we use a basic sleep. | |
| time.sleep(5) | |
| try: | |
| # Customize the selector based on your site's HTML structure. | |
| content_element = driver.find_element(By.ID, "content") | |
| page_text = content_element.text | |
| except Exception as e: | |
| page_text = "Error encountered during scraping: " + str(e) | |
| driver.quit() | |
| return page_text | |
| knowledge_base = scrape_site(SITE_URL) | |
| print("Scraped knowledge base successfully.") | |
| except Exception as e: | |
| print("Scraping failed or Selenium is not configured:", e) | |
| else: | |
| print("Scraping is disabled; proceeding without scraped site content.") | |
| # --- Multilingual Helpers --- | |
| def is_greeting(query: str, lang: str) -> bool: | |
| greetings = { | |
| "en": ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"], | |
| "fr": ["bonjour", "salut", "coucou", "bonsoir"], | |
| "am": ["ሰላም", "ሰላም እንደምን", "እንዴት"] | |
| } | |
| greet_list = greetings.get(lang, greetings["en"]) | |
| # For languages using Latin script, convert to lower case. | |
| if lang != "am": | |
| query = query.lower() | |
| return any(query.startswith(greet) for greet in greet_list) | |
| # Rather than using fixed out-of-scope messages, use the model via Hugging Face to generate them. | |
| def generate_dynamic_out_of_scope_message(language: str) -> str: | |
| # Define language-specific system prompts for generating a dynamic out-of-scope message. | |
| system_prompts = { | |
| "en": ( | |
| "You are a helpful chatbot specializing in agriculture and agro-investment. " | |
| "A user just asked a question that is not related to these topics. " | |
| "Generate a friendly, varied, and intelligent out-of-scope response in English that kindly encourages the user to ask about agriculture or agro-investment." | |
| ), | |
| "fr": ( | |
| "Vous êtes un chatbot utile spécialisé dans l'agriculture et les investissements agroalimentaires. " | |
| "Un utilisateur vient de poser une question qui ne concerne pas ces sujets. " | |
| "Générez une réponse élégante, variée et intelligente en français pour indiquer que la question est hors de portée, en invitant l'utilisateur à poser une question sur l'agriculture ou les investissements agroalimentaires." | |
| ), | |
| "am": ( | |
| "እርስዎ በግብርናና በአገልግሎት ስርዓተ-ቢዝነስ ውስጥ በተለይ የተሞሉ ቻትቦት ናቸው። " | |
| "ተጠቃሚው ለግብርና ወይም ለአገልግሎት ስርዓተ-ቢዝነስ ተያይዞ ያልሆነ ጥያቄ አስቀድመዋል። " | |
| "በአማርኛ በተለያዩ መልኩ የውጭ ክፍል መልእክት ፍጥረት ያድርጉ፤ እባኮትን ተጠቃሚውን ለግብርና ወይም ለአገልግሎት ጥያቄዎች ለመጠየቅ ያነጋግሩ።" | |
| ) | |
| } | |
| prompt = system_prompts.get(language, system_prompts["en"]) | |
| messages = [{"role": "system", "content": prompt}] | |
| # Call the model without streaming to generate the complete message. | |
| response = client.chat_completion( | |
| messages, | |
| max_tokens=80, | |
| stream=False, | |
| temperature=0.7, | |
| top_p=0.95, | |
| ) | |
| # Depending on the client structure, adjust the extraction of the generated text. | |
| try: | |
| out_message = response.choices[0].message.content | |
| except AttributeError: | |
| # If the response structure differs, do a fallback conversion. | |
| out_message = str(response) | |
| return out_message.strip() | |
| # A helper to determine domain relevance (basic implementation; can be expanded). | |
| def is_domain_query(query: str) -> bool: | |
| domain_keywords = [ | |
| "agriculture", "farming", "crop", "agro", "investment", "soil", | |
| "irrigation", "harvest", "organic", "sustainable", "agribusiness", | |
| "livestock", "agroalimentaire", "agriculture durable" | |
| ] | |
| return any(re.search(r"\b" + keyword + r"\b", query, re.IGNORECASE) for keyword in domain_keywords) | |
| def retrieve_relevant_snippet(query: str, text: str, max_length: int = 300) -> str: | |
| sentences = re.split(r'[.?!]', text) | |
| for sentence in sentences |