Spaces:

Jack1808
/

Fine_Tuning_TinyLlama

Sleeping

App Files Files Community

Jainish1808 commited on Jun 18

Commit

0541d4e

1 Parent(s): c38fcf7

Upload main.py

Browse files

Files changed (1) hide show

main.py +311 -13

main.py CHANGED Viewed

@@ -1,6 +1,283 @@
 import os
 import json
 import torch
 from fastapi import FastAPI, Request, Form
 from fastapi.templating import Jinja2Templates
 from fastapi.responses import HTMLResponse
@@ -20,14 +297,6 @@ os.environ["HF_HOME"] = cache_dir
 os.environ["TRANSFORMERS_CACHE"] = cache_dir
 os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
-# FastAPI setup
-app = FastAPI(title="Jack Patel AI Assistant", description="Personal AI Assistant powered by Fine-tuned TinyLlama")
-templates = Jinja2Templates(directory="templates")
-# Create static directory if it doesn't exist
-os.makedirs("static", exist_ok=True)
-app.mount("/static", StaticFiles(directory="static"), name="static")
 # Global variables for model and tokenizer
 model = None
 tokenizer = None
@@ -64,7 +333,7 @@ def load_model_and_tokenizer():
             try:
                 tokenizer = AutoTokenizer.from_pretrained(lora_model_path)
                 logger.info("✅ Tokenizer loaded from LoRA model")
-            except:
                 tokenizer = AutoTokenizer.from_pretrained(base_model_name, cache_dir=cache_dir)
                 logger.info("✅ Tokenizer loaded from base model")
         else:
@@ -193,8 +462,6 @@ def generate_response(instruction: str) -> str:
         logger.error(f"❌ Generation error: {e}")
         return f"Sorry, I encountered an error while generating the response: {str(e)}"
-# Load everything on startup
-@app.on_event("startup")
 async def startup_event():
     """Load model and data on startup"""
     logger.info("🚀 Starting up...")
@@ -202,7 +469,6 @@ async def startup_event():
     load_model_and_tokenizer()
     logger.info("✅ Startup complete!")
-@app.on_event("shutdown")
 async def shutdown_event():
     """Cleanup on shutdown"""
     global model, tokenizer
@@ -211,9 +477,32 @@ async def shutdown_event():
         del model
     if tokenizer is not None:
         del tokenizer
-    torch.cuda.empty_cache() if torch.cuda.is_available() else None
     logger.info("✅ Shutdown complete!")
 # Routes
 @app.get("/", response_class=HTMLResponse)
 async def read_index(request: Request):
@@ -255,6 +544,15 @@ async def api_generate(instruction: str):
     response = generate_response(instruction)
     return {"instruction": instruction, "response": response}
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""

+# import os
+# import json
+# import torch
+# from fastapi import FastAPI, Request, Form
+# from fastapi.templating import Jinja2Templates
+# from fastapi.responses import HTMLResponse
+# from fastapi.staticfiles import StaticFiles
+# from transformers import AutoTokenizer, AutoModelForCausalLM
+# from peft import PeftModel, PeftConfig
+# import logging
+# # Setup logging
+# logging.basicConfig(level=logging.INFO)
+# logger = logging.getLogger(__name__)
+# # Setup environment cache
+# cache_dir = "/tmp/huggingface"
+# os.makedirs(cache_dir, exist_ok=True)
+# os.environ["HF_HOME"] = cache_dir
+# os.environ["TRANSFORMERS_CACHE"] = cache_dir
+# os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
+# # FastAPI setup
+# app = FastAPI(title="Jack Patel AI Assistant", description="Personal AI Assistant powered by Fine-tuned TinyLlama")
+# templates = Jinja2Templates(directory="templates")
+# # Create static directory if it doesn't exist
+# os.makedirs("static", exist_ok=True)
+# app.mount("/static", StaticFiles(directory="static"), name="static")
+# # Global variables for model and tokenizer
+# model = None
+# tokenizer = None
+# training_data = []
+# def load_training_data():
+#     """Load training data from JSON file"""
+#     global training_data
+#     try:
+#         if os.path.exists("data.json"):
+#             with open("data.json", "r", encoding="utf-8") as f:
+#                 training_data = json.load(f)
+#             logger.info(f"✅ Loaded {len(training_data)} training examples")
+#         else:
+#             logger.warning("⚠️  data.json not found, using empty training data")
+#             training_data = []
+#     except Exception as e:
+#         logger.error(f"❌ Error loading training data: {e}")
+#         training_data = []
+# def load_model_and_tokenizer():
+#     """Load the model and tokenizer"""
+#     global model, tokenizer
+#     try:
+#         # Model paths
+#         base_model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+#         lora_model_path = "lora_model"
+#         logger.info("🔄 Loading tokenizer...")
+#         # Try to load tokenizer from LoRA path first, then base model
+#         if os.path.exists(lora_model_path):
+#             try:
+#                 tokenizer = AutoTokenizer.from_pretrained(lora_model_path)
+#                 logger.info("✅ Tokenizer loaded from LoRA model")
+#             except:
+#                 tokenizer = AutoTokenizer.from_pretrained(base_model_name, cache_dir=cache_dir)
+#                 logger.info("✅ Tokenizer loaded from base model")
+#         else:
+#             tokenizer = AutoTokenizer.from_pretrained(base_model_name, cache_dir=cache_dir)
+#             logger.info("✅ Tokenizer loaded from base model")
+#         # Set pad token
+#         if tokenizer.pad_token is None:
+#             tokenizer.pad_token = tokenizer.eos_token
+#         logger.info("🔄 Loading model...")
+#         # Load base model
+#         base_model = AutoModelForCausalLM.from_pretrained(
+#             base_model_name,
+#             device_map="auto",
+#             torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+#             cache_dir=cache_dir,
+#             trust_remote_code=True
+#         )
+#         # Try to load and merge LoRA model if it exists
+#         if os.path.exists(lora_model_path) and os.path.exists(os.path.join(lora_model_path, "adapter_config.json")):
+#             try:
+#                 logger.info("🔄 Loading LoRA adapter...")
+#                 peft_model = PeftModel.from_pretrained(base_model, lora_model_path)
+#                 model = peft_model.merge_and_unload()
+#                 logger.info("✅ LoRA model loaded and merged successfully")
+#             except Exception as e:
+#                 logger.warning(f"⚠️  Could not load LoRA model: {e}, using base model")
+#                 model = base_model
+#         else:
+#             logger.info("ℹ️  No LoRA model found, using base model")
+#             model = base_model
+#         model.eval()
+#         logger.info("✅ Model loaded successfully")
+#         # Print device info
+#         device = next(model.parameters()).device
+#         logger.info(f"🖥️  Model running on: {device}")
+#     except Exception as e:
+#         logger.error(f"❌ Model loading error: {e}")
+#         raise
+# def format_prompt(instruction: str) -> str:
+#     """Format the instruction as a proper prompt"""
+#     return f"""<|system|>
+# You are Jack Patel's personal AI assistant. Answer questions about Jack Patel based on the information you've been trained on. Be friendly, helpful, and accurate.
+# <|user|>
+# {instruction}
+# <|assistant|>
+# """
+# def find_similar_question(question: str) -> str:
+#     """Find similar question in training data and return answer"""
+#     question_lower = question.lower().strip()
+#     # Direct match
+#     for item in training_data:
+#         if item["question"].lower().strip() == question_lower:
+#             return item["answer"]
+#     # Partial match
+#     for item in training_data:
+#         if any(word in item["question"].lower() for word in question_lower.split() if len(word) > 2):
+#             return item["answer"]
+#     return None
+# def generate_response(instruction: str) -> str:
+#     """Generate response from the model"""
+#     if model is None or tokenizer is None:
+#         return "Model not loaded. Please try again later."
+#     try:
+#         # First try to find answer in training data
+#         similar_answer = find_similar_question(instruction)
+#         if similar_answer:
+#             return similar_answer
+#         # If not found, use the model
+#         prompt = format_prompt(instruction)
+#         inputs = tokenizer(
+#             prompt,
+#             return_tensors="pt",
+#             truncation=True,
+#             max_length=512
+#         )
+#         # Move inputs to same device as model
+#         device = next(model.parameters()).device
+#         inputs = {k: v.to(device) for k, v in inputs.items()}
+#         with torch.no_grad():
+#             outputs = model.generate(
+#                 **inputs,
+#                 max_new_tokens=150,
+#                 temperature=0.7,
+#                 top_p=0.9,
+#                 do_sample=True,
+#                 pad_token_id=tokenizer.eos_token_id,
+#                 eos_token_id=tokenizer.eos_token_id,
+#                 repetition_penalty=1.1
+#             )
+#         # Decode the response
+#         full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+#         # Extract only the assistant's response
+#         if "<|assistant|>" in full_response:
+#             response = full_response.split("<|assistant|>")[-1].strip()
+#         else:
+#             response = full_response.replace(prompt, "").strip()
+#         # Clean up the response
+#         response = response.replace("<|user|>", "").replace("<|system|>", "").strip()
+#         return response if response else "I'm sorry, I couldn't generate a proper response. Please try asking differently."
+#     except Exception as e:
+#         logger.error(f"❌ Generation error: {e}")
+#         return f"Sorry, I encountered an error while generating the response: {str(e)}"
+# # Load everything on startup
+# @app.on_event("startup")
+# async def startup_event():
+#     """Load model and data on startup"""
+#     logger.info("🚀 Starting up...")
+#     load_training_data()
+#     load_model_and_tokenizer()
+#     logger.info("✅ Startup complete!")
+# @app.on_event("shutdown")
+# async def shutdown_event():
+#     """Cleanup on shutdown"""
+#     global model, tokenizer
+#     logger.info("🛑 Shutting down...")
+#     if model is not None:
+#         del model
+#     if tokenizer is not None:
+#         del tokenizer
+#     torch.cuda.empty_cache() if torch.cuda.is_available() else None
+#     logger.info("✅ Shutdown complete!")
+# # Routes
+# @app.get("/", response_class=HTMLResponse)
+# async def read_index(request: Request):
+#     """Homepage"""
+#     return templates.TemplateResponse("index.html", {
+#         "request": request,
+#         "result": "",
+#         "instruction": "",
+#         "data_count": len(training_data)
+#     })
+# @app.post("/", response_class=HTMLResponse)
+# async def generate_output(request: Request, instruction: str = Form(...)):
+#     """Generate response for user input"""
+#     if not instruction.strip():
+#         return templates.TemplateResponse("index.html", {
+#             "request": request,
+#             "result": "Please enter a question or instruction.",
+#             "instruction": instruction,
+#             "data_count": len(training_data)
+#         })
+#     logger.info(f"🤖 Generating response for: {instruction}")
+#     response = generate_response(instruction)
+#     return templates.TemplateResponse("index.html", {
+#         "request": request,
+#         "result": response,
+#         "instruction": instruction,
+#         "data_count": len(training_data)
+#     })
+# @app.get("/api/generate")
+# async def api_generate(instruction: str):
+#     """API endpoint for generating responses"""
+#     if not instruction.strip():
+#         return {"error": "Please provide an instruction"}
+#     response = generate_response(instruction)
+#     return {"instruction": instruction, "response": response}
+# @app.get("/health")
+# async def health_check():
+#     """Health check endpoint"""
+#     return {
+#         "status": "healthy",
+#         "model_loaded": model is not None,
+#         "tokenizer_loaded": tokenizer is not None,
+#         "training_data_count": len(training_data),
+#         "device": str(next(model.parameters()).device) if model else "unknown"
+#     }
+# if __name__ == "__main__":
+#     import uvicorn
+#     uvicorn.run(app, host="0.0.0.0", port=7860)
 import os
 import json
 import torch
+from contextlib import asynccontextmanager
 from fastapi import FastAPI, Request, Form
 from fastapi.templating import Jinja2Templates
 from fastapi.responses import HTMLResponse
 os.environ["TRANSFORMERS_CACHE"] = cache_dir
 os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
 # Global variables for model and tokenizer
 model = None
 tokenizer = None
             try:
                 tokenizer = AutoTokenizer.from_pretrained(lora_model_path)
                 logger.info("✅ Tokenizer loaded from LoRA model")
+            except Exception:
                 tokenizer = AutoTokenizer.from_pretrained(base_model_name, cache_dir=cache_dir)
                 logger.info("✅ Tokenizer loaded from base model")
         else:
         logger.error(f"❌ Generation error: {e}")
         return f"Sorry, I encountered an error while generating the response: {str(e)}"
 async def startup_event():
     """Load model and data on startup"""
     logger.info("🚀 Starting up...")
     load_model_and_tokenizer()
     logger.info("✅ Startup complete!")
 async def shutdown_event():
     """Cleanup on shutdown"""
     global model, tokenizer
         del model
     if tokenizer is not None:
         del tokenizer
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
     logger.info("✅ Shutdown complete!")
+# Modern lifespan event handler
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    await startup_event()
+    yield
+    # Shutdown
+    await shutdown_event()
+# FastAPI setup with lifespan
+app = FastAPI(
+    title="Jack Patel AI Assistant",
+    description="Personal AI Assistant powered by Fine-tuned TinyLlama",
+    lifespan=lifespan
+)
+templates = Jinja2Templates(directory="templates")
+# Create static directory if it doesn't exist
+os.makedirs("static", exist_ok=True)
+app.mount("/static", StaticFiles(directory="static"), name="static")
 # Routes
 @app.get("/", response_class=HTMLResponse)
 async def read_index(request: Request):
     response = generate_response(instruction)
     return {"instruction": instruction, "response": response}
+@app.get("/api/questions")
+async def get_questions():
+    """API endpoint to get available questions"""
+    return {
+        "questions": [item["question"] for item in training_data[:10]],  # First 10 questions
+        "total_count": len(training_data),
+        "status": "available"
+    }
 @app.get("/health")
 async def health_check():
     """Health check endpoint"""