import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM import gc # Global variables for model and tokenizer model = None tokenizer = None def load_model(): """Load the model and tokenizer from Hugging Face with CPU optimizations""" global model, tokenizer if model is None or tokenizer is None: try: model_name = "Harshu0117/Materials_IISC_MRC" # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) # Load model with CPU optimizations model = AutoModelForCausalLM.from_pretrained( model_name, device_map="cpu", trust_remote_code=True, low_cpu_mem_usage=True, # Reduce memory usage offload_folder="offload" # Enable model offloading ) # Enable CPU optimizations model.eval() # Set pad token if not set if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Clear GPU cache if any if torch.cuda.is_available(): torch.cuda.empty_cache() # Force garbage collection gc.collect() return "✅ Model loaded successfully with CPU optimizations!" except Exception as e: return f"❌ Error loading model: {str(e)}" return "✅ Model already loaded!" def generate_response(prompt, max_tokens, temperature, top_p, repetition_penalty): """Generate response using the loaded model with CPU optimizations""" global model, tokenizer # Load model if not already loaded if model is None or tokenizer is None: load_result = load_model() if "Error" in load_result: return load_result if not prompt.strip(): return "⚠️ Please enter a question or topic first!" try: # Tokenize input with truncation for faster processing inputs = tokenizer( prompt.strip(), return_tensors="pt", truncation=True, max_length=512, # Reduced from 1024 for faster processing padding=True ) # Keep on CPU inputs = inputs.to("cpu") # Generate response with optimized settings with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=int(max_tokens), temperature=float(temperature), top_p=float(top_p), repetition_penalty=float(repetition_penalty), do_sample=True, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, use_cache=True, num_beams=1, # Use greedy decoding for speed early_stopping=True ) # Decode response response = tokenizer.decode( outputs[0], skip_special_tokens=True ) # Remove the original prompt from response response = response.replace(prompt.strip(), "").strip() # Clear memory del outputs gc.collect() return response except Exception as e: return f"❌ Error generating response: {str(e)}" # Create Gradio interface def create_interface(): # Custom CSS for styling css = """ .gradio-container { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; } .gr-button-primary { background: linear-gradient(45deg, #FF6B6B, #4ECDC4) !important; border: none !important; border-radius: 25px !important; color: white !important; font-weight: bold !important; padding: 12px 24px !important; font-size: 16px !important; transition: all 0.3s ease !important; } .gr-button-primary:hover { transform: translateY(-2px) !important; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2) !important; } .gr-textbox { border-radius: 15px !important; border: 2px solid #e0e0e0 !important; background: rgba(255, 255, 255, 0.95) !important; } .gr-textbox:focus { border-color: #4ECDC4 !important; box-shadow: 0 0 10px rgba(78, 205, 196, 0.3) !important; } .output-text { background: rgba(255, 255, 255, 0.95) !important; border-radius: 15px !important; padding: 20px !important; margin: 10px 0 !important; border-left: 4px solid #4ECDC4 !important; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important; } .gr-accordion { background: rgba(255, 255, 255, 0.1) !important; border-radius: 15px !important; border: 1px solid rgba(255, 255, 255, 0.3) !important; } """ # Create interface with gr.Blocks( css=css, title="🧪 Materials Science AI Assistant", theme=gr.themes.Soft( primary_hue="blue", secondary_hue="cyan", neutral_hue="slate" ) ) as demo: # Header gr.HTML("""
Powered by Fine-tuned LLaMA 3 8B | Specialized in Materials Research
🔬 Specialized in Materials Science | 🧪 MAX Phases & MXenes Expert
Built with ❤️ using Gradio & Hugging Face Spaces