from flask import Flask, request, render_template from transformers import AutoModelForCausalLM, AutoTokenizer import torch import re app = Flask(__name__) # Load model and tokenizer model_path = "./tinyllama_model" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path) device = torch.device("cpu") model.to(device) # Load simple knowledge base knowledge_base = {} try: with open("knowledge_base.txt", "r") as f: for line in f: if ":" in line: key, value = line.strip().split(":", 1) knowledge_base[key.lower()] = value.strip() except FileNotFoundError: knowledge_base = { "ceo of meta": "Mark Zuckerberg", "founder of meta": "Mark Zuckerberg", "meta founding year": "2004" } @app.route('/') def home(): return render_template('index.html') @app.route('/generate', methods=['POST']) def generate(): user_input = request.form['prompt'].strip().lower() # Check knowledge base for factual answers response = None for key in knowledge_base: if key in user_input: response = knowledge_base[key] break if not response: # Create chat prompt template prompt = f"<|SYSTEM|> You are a helpful assistant providing accurate and concise answers. Avoid fabricating details or repeating user input unnecessarily. If unsure, say so. <|USER|> {user_input} <|ASSISTANT|> " inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate(**inputs, max_length=100, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id) generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Clean response response = generated_text.split("<|ASSISTANT|> ")[-1] if "<|ASSISTANT|> " in generated_text else generated_text response = re.sub(r"\s+", " ", response).strip() # Avoid repetitive or irrelevant output if user_input in response.lower() and len(response.split()) < 10: response = "I'm not sure how to respond to that. Could you clarify or ask something else?" return response if __name__ == '__main__': app.run(debug=True)