Spaces:

Jack1808
/

Fine_Tuning_TinyLlama

Sleeping

App Files Files Community

Jainish1808 commited on Jun 21

Commit

c5c6aed

1 Parent(s): 46a03f3

Uploaded 21-06 (7)

Browse files

Files changed (2) hide show

main.py +75 -80
templates/index.html +3 -18

main.py CHANGED Viewed

@@ -1,127 +1,122 @@
 import os
 import torch
-from fastapi import FastAPI, Request, Form
-from fastapi.templating import Jinja2Templates
 from fastapi.responses import HTMLResponse
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
-from pathlib import Path
-# Set up Hugging Face cache directories
-cache_dir = "/tmp/huggingface"
-offload_dir = os.path.join(cache_dir, "offload")
-os.makedirs(cache_dir, exist_ok=True)
-os.makedirs(offload_dir, exist_ok=True)
-os.environ["HF_HOME"] = cache_dir
-os.environ["TRANSFORMERS_CACHE"] = cache_dir
-os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
-# FastAPI setup
-app = FastAPI()
-templates = Jinja2Templates(directory="templates")
-# Load base model
 BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-# FIXED PROMPT TEMPLATE
 PROMPT_TEMPLATE = """<|system|>
-You are Jack Patel. Answer questions about yourself using only information you were trained on. If you don't know something specific about yourself, say "I don't have that information."
-If the user's question is not about Jack Patel, answer as an AI assistant using your general knowledge and provide the most accurate answer possible.
 <|user|>
 {prompt}
 <|assistant|>
 """
-def load_model(base_model, lora_path):
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(lora_path)
-    except:
-        tokenizer = AutoTokenizer.from_pretrained(base_model)
     tokenizer.pad_token = tokenizer.eos_token
     base = AutoModelForCausalLM.from_pretrained(
-        base_model,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-        low_cpu_mem_usage=True,
-        cache_dir=cache_dir
     )
-    model = PeftModel.from_pretrained(base, lora_path)
-    model = model.merge_and_unload()
     model.to("cuda" if torch.cuda.is_available() else "cpu").eval()
     return tokenizer, model
-# Load LoRA and QLoRA models
-try:
-    TOKENIZER_LORA, MODEL_LORA = load_model(BASE_MODEL, "lora_model")
-    TOKENIZER_QLORA, MODEL_QLORA = load_model(BASE_MODEL, "Qlora_model")
-except Exception as e:
-    print(f"Model loading failed: {e}")
-    exit(1)
 def generate_response(prompt, tokenizer, model):
     full_prompt = PROMPT_TEMPLATE.format(prompt=prompt)
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
-        outputs = model.generate(
             **inputs,
-            max_new_tokens=100,
-            temperature=0.3,
             top_p=0.9,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
             repetition_penalty=1.1
         )
-    # Decode and clean the response
-    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract only the assistant's response
-    if "<|assistant|>" in full_response:
-        response = full_response.split("<|assistant|>")[-1].strip()
-    else:
-        response = full_response.split("### Response:")[-1].strip() if "### Response:" in full_response else full_response
-    # Clean up any remaining artifacts
-    response = response.replace("<|user|>", "").replace("<|system|>", "").strip()
-    return response
 @app.get("/", response_class=HTMLResponse)
-def index(request: Request):
     return templates.TemplateResponse("index.html", {
         "request": request,
-        "data_count": 184,
         "prompt": "",
-        "result": "",
-        "model": ""
     })
 @app.post("/", response_class=HTMLResponse)
-async def query(request: Request, prompt: str = Form(...), model_type: str = Form(...)):
-    if model_type == "lora":
-        response = generate_response(prompt, TOKENIZER_LORA, MODEL_LORA)
-        model_label = "LoRA - lora-tinyllama-final"
-    elif model_type == "Qlora1":
-        response = generate_response(prompt, TOKENIZER_QLORA, MODEL_QLORA)
-        model_label = "QLoRA - lora-tinyllama-final1"
-    else:
-        response = "Invalid model selected."
-        model_label = model_type
     return templates.TemplateResponse("index.html", {
         "request": request,
-        "data_count": 184,
         "prompt": prompt,
-        "result": response,
-        "model": model_label
-    })
-# Run server
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import torch
+from fastapi import FastAPI, Form, Request
 from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from peft import PeftModel
+# Paths
 BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+LORA_MODEL_DIR = "./lora_model"
+QLORA_MODEL_DIR = "./Qlora_model"
+ADALORA_MODEL_DIR = "./adalora_model"
+cache_dir = "./cache"
+# Prompt Template
 PROMPT_TEMPLATE = """<|system|>
+You are Jack Patel. Answer questions about yourself using only information you were trained on. If you don't know something specific about yourself, say "I don't have that information."
+If the user's question is not about Jack Patel, answer as an AI assistant using your general knowledge.
+Always respond in 2 to 3 short sentences.
 <|user|>
 {prompt}
 <|assistant|>
 """
+app = FastAPI()
+app.mount("/static", StaticFiles(directory="static"), name="static")
+templates = Jinja2Templates(directory="templates")
+# Global cache to avoid reloading models
+model_cache = {}
+def load_model(adapter_path):
+    if adapter_path in model_cache:
+        return model_cache[adapter_path]
+    print(f"🔄 Loading model from: {adapter_path}")
+    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
     tokenizer.pad_token = tokenizer.eos_token
     base = AutoModelForCausalLM.from_pretrained(
+        BASE_MODEL,
         torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        cache_dir=cache_dir,
     )
+    model = PeftModel.from_pretrained(base, adapter_path)
     model.to("cuda" if torch.cuda.is_available() else "cpu").eval()
+    model_cache[adapter_path] = (tokenizer, model)
     return tokenizer, model
 def generate_response(prompt, tokenizer, model):
     full_prompt = PROMPT_TEMPLATE.format(prompt=prompt)
     inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
+        output = model.generate(
             **inputs,
+            max_new_tokens=50,
+            temperature=0.7,
             top_p=0.9,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,
             repetition_penalty=1.1
         )
+    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
+    return decoded.split("<|assistant|>")[-1].strip() if "<|assistant|>" in decoded else decoded.strip()
 @app.get("/", response_class=HTMLResponse)
+async def form_get(request: Request):
     return templates.TemplateResponse("index.html", {
         "request": request,
+        "result": None,
+        "model": "",
         "prompt": "",
+        "data_count": 0
     })
 @app.post("/", response_class=HTMLResponse)
+async def form_post(
+    request: Request,
+    prompt: str = Form(...),
+    model_type: str = Form(...)
+):
+    model_paths = {
+        "lora": LORA_MODEL_DIR,
+        "Qlora1": QLORA_MODEL_DIR,
+        "adalora": ADALORA_MODEL_DIR
+    }
+    model_labels = {
+        "lora": "LoRA - lora-tinyllama-final",
+        "Qlora1": "QLoRA - lora-tinyllama-final1",
+        "adalora": "AdaLoRA - adalora-tinyllama-final"
+    }
+    adapter_path = model_paths.get(model_type)
+    model_label = model_labels.get(model_type, model_type.upper())
+    if not adapter_path or not os.path.exists(adapter_path):
+        return templates.TemplateResponse("index.html", {
+            "request": request,
+            "result": "Invalid or missing model selected.",
+            "model": model_label,
+            "prompt": prompt,
+            "data_count": 0
+        })
+    try:
+        tokenizer, model = load_model(adapter_path)
+        result = generate_response(prompt, tokenizer, model)
+    except Exception as e:
+        result = f"Error generating response: {str(e)}"
     return templates.TemplateResponse("index.html", {
         "request": request,
+        "result": result,
+        "model": model_label,
         "prompt": prompt,
+        "data_count": 0  # Replace with real data count if available
+    })

templates/index.html CHANGED Viewed

@@ -5,7 +5,7 @@
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>Jack Patel AI Assistant</title>
     <style>
-        /* Same CSS as your original index.html */
         * {
             margin: 0;
             padding: 0;
@@ -143,6 +143,7 @@
         .suggestions-dropdown {
             position: absolute;
             top: 100%;
             left: 0;
             right: 0;
             background: white;
@@ -289,17 +290,15 @@
             </div>
             {% endif %}
         </div>
         <!-- Model Selection Dropdown -->
         <div class="model-select-container">
             <label for="modelSelect">Choose a model:</label>
             <select id="modelSelect" name="model_type" class="model-select">
                 <option value="lora">LoRA - lora-tinyllama-final</option>
-                <!-- <option value="adalora">AdaLoRA - adalora-tinyllama-final</option> -->
                 <option value="Qlora1">QLoRA - lora-tinyllama-final1</option>
             </select>
         </div>
         <form method="post" id="questionForm">
             <div class="chat-input-container">
                 <div class="input-wrapper">
@@ -316,10 +315,8 @@
                         </svg>
                     </button>
                 </div>
-                <div class="suggestions-dropdown" id="suggestionsDropdown"></div>
             </div>
         </form>
         <div class="loading" id="loadingDiv">
             <div class="loading-dots">
                 <div class="loading-dot"></div>
@@ -328,7 +325,6 @@
             </div>
             <p style="margin-top: 1rem; color: #6b7280;">Generating response...</p>
         </div>
         {% if result %}
         <div class="response-container">
             <div class="response-header">
@@ -341,7 +337,6 @@
             <div class="response-text">{{ result }}</div>
         </div>
         {% endif %}
         <div class="example-questions">
             <h3>Try asking:</h3>
             <div class="example-grid">
@@ -360,41 +355,32 @@
             </div>
         </div>
     </div>
     <div class="footer">
         <p>Powered by TinyLlama and Hugging Face</p>
     </div>
     <script>
         function fillQuestion(question) {
             document.getElementById('instruction').value = question;
             document.getElementById('instruction').focus();
         }
         document.getElementById('questionForm').addEventListener('submit', async function(e) {
             e.preventDefault();
             const textarea = document.getElementById('instruction');
             const modelType = document.getElementById('modelSelect').value;
             const submitBtn = document.getElementById('submitBtn');
             const loadingDiv = document.getElementById('loadingDiv');
             const prompt = textarea.value.trim();
             if (!prompt) return;
             loadingDiv.classList.add('show');
             submitBtn.disabled = true;
             const formData = new FormData();
             formData.append('prompt', prompt);
             formData.append('model_type', modelType);
             try {
                 const response = await fetch("/", {
                     method: "POST",
                     body: formData
                 });
                 const html = await response.text();
                 document.open();
                 document.write(html);
@@ -406,6 +392,5 @@
             }
         });
     </script>
 </body>
 </html>

     <meta name="viewport" content="width=device-width, initial-scale=1.0">
     <title>Jack Patel AI Assistant</title>
     <style>
+        /* Same CSS as before — unchanged */
         * {
             margin: 0;
             padding: 0;
         .suggestions-dropdown {
             position: absolute;
             top: 100%;
+;
             left: 0;
             right: 0;
             background: white;
             </div>
             {% endif %}
         </div>
         <!-- Model Selection Dropdown -->
         <div class="model-select-container">
             <label for="modelSelect">Choose a model:</label>
             <select id="modelSelect" name="model_type" class="model-select">
                 <option value="lora">LoRA - lora-tinyllama-final</option>
+                <option value="adalora">AdaLoRA - adalora-tinyllama-final</option>
                 <option value="Qlora1">QLoRA - lora-tinyllama-final1</option>
             </select>
         </div>
         <form method="post" id="questionForm">
             <div class="chat-input-container">
                 <div class="input-wrapper">
                         </svg>
                     </button>
                 </div>
             </div>
         </form>
         <div class="loading" id="loadingDiv">
             <div class="loading-dots">
                 <div class="loading-dot"></div>
             </div>
             <p style="margin-top: 1rem; color: #6b7280;">Generating response...</p>
         </div>
         {% if result %}
         <div class="response-container">
             <div class="response-header">
             <div class="response-text">{{ result }}</div>
         </div>
         {% endif %}
         <div class="example-questions">
             <h3>Try asking:</h3>
             <div class="example-grid">
             </div>
         </div>
     </div>
     <div class="footer">
         <p>Powered by TinyLlama and Hugging Face</p>
     </div>
     <script>
         function fillQuestion(question) {
             document.getElementById('instruction').value = question;
             document.getElementById('instruction').focus();
         }
         document.getElementById('questionForm').addEventListener('submit', async function(e) {
             e.preventDefault();
             const textarea = document.getElementById('instruction');
             const modelType = document.getElementById('modelSelect').value;
             const submitBtn = document.getElementById('submitBtn');
             const loadingDiv = document.getElementById('loadingDiv');
             const prompt = textarea.value.trim();
             if (!prompt) return;
             loadingDiv.classList.add('show');
             submitBtn.disabled = true;
             const formData = new FormData();
             formData.append('prompt', prompt);
             formData.append('model_type', modelType);
             try {
                 const response = await fetch("/", {
                     method: "POST",
                     body: formData
                 });
                 const html = await response.text();
                 document.open();
                 document.write(html);
             }
         });
     </script>
 </body>
 </html>