File size: 2,240 Bytes
bbd5c76
06323bb
b4b7a21
4721d91
b4b7a21
06323bb
 
 
 
 
 
 
 
3421ed4
b4b7a21
3421ed4
4721d91
 
3421ed4
b4b7a21
3421ed4
b4b7a21
06323bb
 
8735569
b4b7a21
06323bb
b4b7a21
 
4721d91
b4b7a21
 
 
bbd5c76
b4b7a21
bbd5c76
4721d91
5f8a9ed
4721d91
 
 
 
b9fcfca
 
b4b7a21
4721d91
 
b4b7a21
 
 
 
 
b9fcfca
 
4721d91
b9fcfca
 
4721d91
b9fcfca
bbd5c76
b9fcfca
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
import requests
from bs4 import BeautifulSoup, Comment
import os
from llama_cpp import Llama

def tag_visible(element):
    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
        return False
    if isinstance(element, Comment):
        return False
    return True

def get_text_from_url(url):
    response = requests.get(url, timeout=10)
    soup = BeautifulSoup(response.text, 'html.parser')
    # Use 'string=True' instead of deprecated 'text=True'
    texts = soup.find_all(string=True)
    visible_texts = filter(tag_visible, texts)
    return " ".join(t.strip() for t in visible_texts)

# Pre-fetch and truncate homepage text
text_list = []
homepage_url = "https://sites.google.com/view/abhilashnandy/home/"
extensions = ["", "pmrf-profile-page"]

for ext in extensions:
    try:
        full_text = get_text_from_url(homepage_url + ext)
        truncated_text = full_text[:2000]  # Adjust truncation length as needed
        text_list.append(truncated_text)
    except Exception as e:
        text_list.append(f"Error fetching {homepage_url+ext}: {str(e)}")

CONTEXT = " ".join(text_list)

# Set the model path. Make sure the model file is downloaded and placed in the 'models' directory.
model_path = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
if not os.path.exists(model_path):
    raise ValueError(f"Model file not found at {model_path}. Please download the model file and place it in the 'models' folder.")

llm = Llama(model_path=model_path, n_ctx=4096, n_threads=6, verbose=False)

def answer_query(query):
    prompt = (
        "You are an AI chatbot answering queries based on Abhilash Nandy's homepage. "
        "Provide concise answers (under 30 words).\n\n"
        f"Context: {CONTEXT}\n\nUser: {query}\nAI:"
    )
    response = llm(prompt, max_tokens=50, stop=["\nUser:", "\nAI:"], echo=False)
    return response["choices"][0]["text"].strip()

iface = gr.Interface(
    fn=answer_query,
    inputs=gr.Textbox(lines=2, placeholder="Ask a question about Abhilash Nandy's homepage..."),
    outputs="text",
    title="Homepage QA Chatbot",
    description="A chatbot answering queries based on homepage context."
)

if __name__ == '__main__':
    iface.launch()