Spaces:

Sushyy
/

bug-explainer-ml

Sleeping

App Files Files Community

Sushwetabm commited on Jul 27

Commit

f59cf24

0 Parent(s):

Deploy ML microservice to Hugging Face Space

Browse files

Files changed (10) hide show

.dockerignore +12 -0
.gitignore +9 -0
Dockerfile +32 -0
__init__.py +0 -0
analyzer.py +326 -0
app.py +55 -0
main.py +409 -0
model.py +124 -0
requirements.txt +24 -0
setup.py +106 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,12 @@

+__pycache__
+.venv
+.git
+*.md
+*.pdf
+*.pt
+*.bin
+*.log
+.venv/
+*.pyc
+.DS_Store
+model_cache/

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+__pycache__/
+*.pyc
+venv/
+.env
+.venv
+Lib
+model_cache/
+offload/

Dockerfile ADDED Viewed

	@@ -0,0 +1,32 @@

+# ✅ Use official slim image
+FROM python:3.10-slim
+# ✅ Set working directory
+WORKDIR /app
+# ✅ Set environment variables early to ensure cache use in setup.py
+ENV TRANSFORMERS_CACHE=/app/model_cache \
+    HF_HOME=/app/model_cache \
+    TORCH_HOME=/app/model_cache \
+    TOKENIZERS_PARALLELISM=false \
+    OMP_NUM_THREADS=4
+# ✅ Install only necessary OS packages and clean cache
+RUN apt-get update && apt-get install -y git \
+ && rm -rf /var/lib/apt/lists/*
+# ✅ Copy files (excluding model_cache, logs, etc. via .dockerignore)
+COPY . .
+# ✅ Upgrade pip + install deps without cache
+RUN pip install --upgrade pip \
+ && pip install --no-cache-dir -r requirements.txt
+# ✅ Run setup.py to download the model
+RUN python setup.py
+# ✅ Expose Hugging Face Space-required port
+EXPOSE 7860
+# ✅ Launch FastAPI on port 7860 for HF Space
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-]()

__init__.py ADDED Viewed

File without changes

analyzer.py ADDED Viewed

	@@ -0,0 +1,326 @@

+# import json
+# def analyze_code(language, code, tokenizer, model):
+#     messages = [
+#         {
+#             "role": "system",
+#             "content": (
+#                 "You are a helpful and expert-level AI code reviewer and bug fixer. "
+#                 "Your task is to analyze the given buggy code in the specified programming language, "
+#                 "identify bugs (logical, syntax, runtime, etc.), and fix them. "
+#                 "Return a JSON object with the following keys:\n\n"
+#                 "1. 'bug_analysis': a list of objects, each containing:\n"
+#                 "   - 'line_number': the line number (approximate if needed)\n"
+#                 "   - 'error_message': a short name of the bug\n"
+#                 "   - 'explanation': short explanation of the problem\n"
+#                 "   - 'fix_suggestion': how to fix it\n"
+#                 "2. 'corrected_code': the entire corrected code block.\n\n"
+#                 "Respond with ONLY the raw JSON object, no extra commentary or markdown."
+#             )
+#         },
+#         {
+#             "role": "user",
+#             "content": f"💻 Language: {language}\n🐞 Buggy Code:\n```{language.lower()}\n{code.strip()}\n```"
+#         }
+#     ]
+#     inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
+#     attention_mask = (inputs != tokenizer.pad_token_id).long()
+#     outputs = model.generate(
+#         inputs,
+#         attention_mask=attention_mask,
+#         max_new_tokens=1024,
+#         do_sample=False,
+#         pad_token_id=tokenizer.eos_token_id,
+#         eos_token_id=tokenizer.eos_token_id
+#     )
+#     response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
+#     # Try parsing response to JSON
+#     try:
+#         json_output = json.loads(response)
+#         return json_output
+#     except json.JSONDecodeError:
+#         print("⚠️ Could not decode response into JSON. Here's the raw output:\n")
+#         print(response)
+#         return None
+# import json
+# import logging
+# import time
+# import torch
+# # Configure logging
+# logger = logging.getLogger(__name__)
+# def analyze_code(language, code, tokenizer, model):
+#     """
+#     Analyze code and return bug analysis with improved logging and error handling
+#     """
+#     start_time = time.time()
+#     logger.info(f"🔍 Starting analysis for {language} code ({len(code)} characters)")
+#     try:
+#         # Prepare messages
+#         messages = [
+#             {
+#                 "role": "system",
+#                 "content": (
+#                     "You are a helpful and expert-level AI code reviewer and bug fixer. "
+#                     "Your task is to analyze the given buggy code in the specified programming language, "
+#                     "identify bugs (logical, syntax, runtime, etc.), and fix them. "
+#                     "Return a JSON object with the following keys:\n\n"
+#                     "1. 'bug_analysis': a list of objects, each containing:\n"
+#                     "   - 'line_number': the line number (approximate if needed)\n"
+#                     "   - 'error_message': a short name of the bug\n"
+#                     "   - 'explanation': short explanation of the problem\n"
+#                     "   - 'fix_suggestion': how to fix it\n"
+#                     "2. 'corrected_code': the entire corrected code block.\n\n"
+#                     "Respond with ONLY the raw JSON object, no extra commentary or markdown."
+#                 )
+#             },
+#             {
+#                 "role": "user",
+#                 "content": f"💻 Language: {language}\n🐞 Buggy Code:\n```{language.lower()}\n{code.strip()}\n```"
+#             }
+#         ]
+#         logger.info("🔧 Applying chat template...")
+#         inputs = tokenizer.apply_chat_template(
+#             messages,
+#             add_generation_prompt=True,
+#             return_tensors="pt"
+#         ).to(model.device)
+#         attention_mask = (inputs != tokenizer.pad_token_id).long()
+#         logger.info(f"📏 Input length: {inputs.shape[1]} tokens")
+#         logger.info("🚀 Starting model generation...")
+#         generation_start = time.time()
+#         # Generate with more conservative settings
+#         with torch.no_grad():  # Ensure no gradients are computed
+#             outputs = model.generate(
+#                 inputs,
+#                 attention_mask=attention_mask,
+#                 max_new_tokens=512,  # Reduced from 1024 for faster inference
+#                 do_sample=False,
+#                 temperature=0.1,  # Add temperature for more consistent output
+#                 pad_token_id=tokenizer.eos_token_id,
+#                 eos_token_id=tokenizer.eos_token_id,
+#                 use_cache=True,  # Enable KV cache for efficiency
+#             )
+#         generation_time = time.time() - generation_start
+#         logger.info(f"⚡ Generation completed in {generation_time:.2f} seconds")
+#         logger.info("📝 Decoding response...")
+#         response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
+#         logger.info(f"📄 Response length: {len(response)} characters")
+#         logger.info(f"🔍 First 100 chars: {response[:100]}...")
+#         # Try parsing response to JSON
+#         logger.info("🔍 Attempting to parse JSON...")
+#         try:
+#             # Clean up response - remove any markdown formatting
+#             cleaned_response = response.strip()
+#             if cleaned_response.startswith('```json'):
+#                 cleaned_response = cleaned_response[7:]
+#             if cleaned_response.startswith('```'):
+#                 cleaned_response = cleaned_response[3:]
+#             if cleaned_response.endswith('```'):
+#                 cleaned_response = cleaned_response[:-3]
+#             cleaned_response = cleaned_response.strip()
+#             json_output = json.loads(cleaned_response)
+#             total_time = time.time() - start_time
+#             logger.info(f"✅ Analysis completed successfully in {total_time:.2f} seconds")
+#             # Validate the JSON structure
+#             if not isinstance(json_output, dict):
+#                 raise ValueError("Response is not a dictionary")
+#             if 'bug_analysis' not in json_output:
+#                 logger.warning("⚠️ Missing 'bug_analysis' key, adding empty list")
+#                 json_output['bug_analysis'] = []
+#             if 'corrected_code' not in json_output:
+#                 logger.warning("⚠️ Missing 'corrected_code' key, adding original code")
+#                 json_output['corrected_code'] = code
+#             return json_output
+#         except json.JSONDecodeError as e:
+#             logger.error(f"❌ JSON decode error: {e}")
+#             logger.error(f"📄 Raw response: {repr(response)}")
+#             # Return a fallback structure with the raw response
+#             fallback_response = {
+#                 "bug_analysis": [{
+#                     "line_number": 1,
+#                     "error_message": "Analysis parsing failed",
+#                     "explanation": "The AI model returned a response that couldn't be parsed as JSON",
+#                     "fix_suggestion": "Please try again or check the code format"
+#                 }],
+#                 "corrected_code": code,
+#                 "raw_output": response,
+#                 "parsing_error": str(e)
+#             }
+#             return fallback_response
+#     except Exception as e:
+#         total_time = time.time() - start_time
+#         logger.error(f"❌ Analysis failed after {total_time:.2f} seconds: {str(e)}")
+#         logger.error(f"💥 Exception type: {type(e).__name__}")
+#         # Return error response
+#         return {
+#             "bug_analysis": [{
+#                 "line_number": 1,
+#                 "error_message": "Analysis failed",
+#                 "explanation": f"An error occurred during analysis: {str(e)}",
+#                 "fix_suggestion": "Please try again or contact support"
+#             }],
+#             "corrected_code": code,
+#             "error": str(e),
+#             "error_type": type(e).__name__
+#         }
+# analyzer.py
+import torch
+import json
+import time
+import logging
+# Configure logger
+logger = logging.getLogger("CodeAnalyzer")
+logger.setLevel(logging.INFO)
+handler = logging.StreamHandler()
+formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] - %(message)s")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+def analyze_code(tokenizer, model, language, code):
+    start_time = time.time()
+    messages = [
+        {
+            "role": "system",
+            "content": (
+                "You are a helpful and expert-level AI code reviewer and bug fixer. "
+                "Your task is to analyze the given buggy code in the specified programming language, "
+                "identify bugs (logical, syntax, runtime, etc.), and fix them. "
+                "Return a JSON object with the following keys:\n\n"
+                "1. 'bug_analysis': a list of objects, each containing:\n"
+                "   - 'line_number': the line number (approximate if needed)\n"
+                "   - 'error_message': a short name of the bug\n"
+                "   - 'explanation': short explanation of the problem\n"
+                "   - 'fix_suggestion': how to fix it\n"
+                "2. 'corrected_code': the entire corrected code block.\n\n"
+                "Respond only with a JSON block, no extra commentary."
+            )
+        },
+        {
+            "role": "user",
+            "content": f"💻 Language: {language}\n🐞 Buggy Code:\n```{language.lower()}\n{code.strip()}\n```"
+        }
+    ]
+    try:
+        logger.info("📦 Tokenizing input...")
+        inputs = tokenizer.apply_chat_template(
+            messages,
+            add_generation_prompt=True,
+            return_tensors="pt"
+        ).to(model.device)
+        attention_mask = (inputs != tokenizer.pad_token_id).long()
+        logger.info("⚙️ Starting generation...")
+        generation_start = time.time()
+        outputs = model.generate(
+            inputs,
+            attention_mask=attention_mask,
+            max_new_tokens=1024,
+            do_sample=False,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id
+        )
+        generation_time = time.time() - generation_start
+        logger.info(f"⚡ Generation completed in {generation_time:.2f} seconds")
+        logger.info("📝 Decoding response...")
+        response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
+        logger.info(f"📄 Response length: {len(response)} characters")
+        logger.info(f"🔍 First 100 chars: {response[:100]}...")
+        # Attempt to parse as JSON
+        logger.info("🔍 Attempting to parse JSON...")
+        cleaned_response = response.strip()
+        if cleaned_response.startswith('```json'):
+            cleaned_response = cleaned_response[7:]
+        elif cleaned_response.startswith('```'):
+            cleaned_response = cleaned_response[3:]
+        if cleaned_response.endswith('```'):
+            cleaned_response = cleaned_response[:-3]
+        cleaned_response = cleaned_response.strip()
+        json_output = json.loads(cleaned_response)
+        total_time = time.time() - start_time
+        logger.info(f"✅ Analysis completed successfully in {total_time:.2f} seconds")
+        # Validate and patch missing keys
+        if not isinstance(json_output, dict):
+            raise ValueError("Parsed response is not a dictionary")
+        if 'bug_analysis' not in json_output:
+            logger.warning("⚠️ Missing 'bug_analysis' key, adding empty list")
+            json_output['bug_analysis'] = []
+        if 'corrected_code' not in json_output:
+            logger.warning("⚠️ Missing 'corrected_code' key, adding original code")
+            json_output['corrected_code'] = code
+        return json_output
+    except json.JSONDecodeError as e:
+        logger.error(f"❌ JSON decode error: {e}")
+        logger.error(f"📄 Raw response: {repr(response)}")
+        return {
+            "bug_analysis": [{
+                "line_number": 1,
+                "error_message": "Analysis parsing failed",
+                "explanation": "The AI model returned a response that couldn't be parsed as JSON",
+                "fix_suggestion": "Please try again or check the code format"
+            }],
+            "corrected_code": code,
+            "raw_output": response,
+            "parsing_error": str(e)
+        }
+    except Exception as e:
+        total_time = time.time() - start_time
+        logger.error(f"❌ Analysis failed after {total_time:.2f} seconds: {str(e)}")
+        logger.error(f"💥 Exception type: {type(e).__name__}")
+        return {
+            "bug_analysis": [{
+                "line_number": 1,
+                "error_message": "Analysis failed",
+                "explanation": f"An error occurred during analysis: {str(e)}",
+                "fix_suggestion": "Please try again or contact support"
+            }],
+            "corrected_code": code,
+            "error": str(e),
+            "error_type": type(e).__name__
+        }

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# # app.py
+# from model import load_model
+# from analyzer import analyze_code
+# import json
+# if __name__ == "__main__":
+#     print("🔧 AI Bug Explainer - Local Terminal Interface")
+#     language = input("Enter programming language (e.g., Python): ")
+#     print("\nPaste your buggy code. End input with a line that says only 'END':\n")
+#     lines = []
+#     while True:
+#         line = input()
+#         if line.strip() == "END":
+#             break
+#         lines.append(line)
+#     code = "\n".join(lines)
+#     tokenizer, model = load_model()
+#     print("\n🔍 Analyzing your code...\n")
+#     result = analyze_code(language, code, tokenizer, model)
+#     print(json.dumps(result, indent=2))
+# app.py
+from model import load_model
+from analyzer import analyze_code
+import json
+def main():
+    print("🔧 Loading model...")
+    tokenizer, model = load_model()
+    print("\n📥 Enter your code for analysis.")
+    language = input("Programming Language (e.g., Python, JavaScript): ").strip()
+    print("Paste your buggy code (end input with an empty line):")
+    code_lines = []
+    while True:
+        line = input()
+        if line == "":
+            break
+        code_lines.append(line)
+    code = "\n".join(code_lines)
+    print("\n🔍 Analyzing your code...\n")
+    result = analyze_code(tokenizer, model, language, code)
+    print("\n🧾 JSON Response:")
+    print(json.dumps(result, indent=2))
+if __name__ == "__main__":
+    main()

main.py ADDED Viewed

	@@ -0,0 +1,409 @@

+# from fastapi import FastAPI, HTTPException
+# from fastapi.middleware.cors import CORSMiddleware
+# from pydantic import BaseModel
+# from model import load_model
+# from analyzer import analyze_code
+# import logging
+# app = FastAPI(
+#     title="AI Bug Explainer",
+#     description="An AI service that detects and fixes bugs in code",
+#     version="1.0.0"
+# )
+# # CORS setup
+# app.add_middleware(
+#     CORSMiddleware,
+#     allow_origins=["*"],  # Replace with your frontend URL in prod
+#     allow_credentials=True,
+#     allow_methods=["*"],
+#     allow_headers=["*"],
+# )
+# # Logging setup
+# logging.basicConfig(level=logging.INFO)
+# class AnalyzeRequest(BaseModel):
+#     language: str
+#     code: str
+# @app.post("/analyze")
+# async def analyze(req: AnalyzeRequest):
+#     logging.info(f"🔍 Received code for analysis ({req.language})")
+#     result = analyze_code(req.language, req.code, tokenizer, model)
+#     if result is None:
+#         raise HTTPException(status_code=500, detail="Model failed to return any response.")
+#     if not isinstance(result, dict):
+#         logging.warning("⚠️ Model did not return valid JSON, sending raw output")
+#         return {
+#             "bugs": [],
+#             "corrected_code": "",
+#             "raw_output": result
+#         }
+#     return {
+#         "bugs": result.get("bug_analysis", []),
+#         "corrected_code": result.get("corrected_code", ""),
+#         "raw_output": ""  # So frontend doesn't break
+#     }
+# # Load model
+# print("🔧 Loading model...")
+# tokenizer, model = load_model()
+# print("✅ Model loaded!")
+# from fastapi import FastAPI, HTTPException
+# from fastapi.middleware.cors import CORSMiddleware
+# from pydantic import BaseModel
+# from model import load_model
+# from analyzer import analyze_code
+# import logging
+# app = FastAPI(
+#     title="AI Bug Explainer ML Microservice",
+#     description="An AI service that detects and fixes bugs in code",
+#     version="1.0.0"
+# )
+# # CORS setup
+# app.add_middleware(
+#     CORSMiddleware,
+#     allow_origins=["*"],  # Replace with your frontend URL in prod
+#     allow_credentials=True,
+#     allow_methods=["*"],
+#     allow_headers=["*"],
+# )
+# # Logging setup
+# logging.basicConfig(level=logging.INFO)
+# class AnalyzeRequest(BaseModel):
+#     language: str
+#     code: str
+# # Transform bug analysis to match frontend expectations
+# def transform_bug_to_issue(bug):
+#     """Transform ML service bug format to frontend issue format"""
+#     return {
+#         "lineNumber": bug.get("line_number", 0),
+#         "type": bug.get("error_message", "Unknown Error"),
+#         "message": bug.get("explanation", "No explanation provided"),
+#         "suggestion": bug.get("fix_suggestion", "No suggestion provided")
+#     }
+# # Keep your original endpoint for backward compatibility
+# @app.post("/analyze")
+# async def analyze(req: AnalyzeRequest):
+#     logging.info(f"🔍 Received code for analysis ({req.language})")
+#     result = analyze_code(req.language, req.code, tokenizer, model)
+#     if result is None:
+#         raise HTTPException(status_code=500, detail="Model failed to return any response.")
+#     if not isinstance(result, dict):
+#         logging.warning("⚠️ Model did not return valid JSON, sending raw output")
+#         return {
+#             "bugs": [],
+#             "corrected_code": "",
+#             "raw_output": result
+#         }
+#     return {
+#         "bugs": result.get("bug_analysis", []),
+#         "corrected_code": result.get("corrected_code", ""),
+#         "raw_output": ""  # So frontend doesn't break
+#     }
+# # NEW: Add frontend-compatible endpoint
+# @app.post("/analysis/submit")
+# async def analyze_for_frontend(req: AnalyzeRequest):
+#     logging.info(f"🔍 Frontend: Received code for analysis ({req.language})")
+#     result = analyze_code(req.language, req.code, tokenizer, model)
+#     if result is None:
+#         raise HTTPException(status_code=500, detail="Model failed to return any response.")
+#     # If result is not valid JSON, return raw output as fallback
+#     if not isinstance(result, dict):
+#         logging.warning("⚠️ Model did not return valid JSON, showing raw output")
+#         return {
+#             "success": False,
+#             "has_json_output": False,
+#             "corrected_code": "",
+#             "issues": [],
+#             "raw_output": str(result)
+#         }
+#     # Successfully parsed JSON
+#     bugs = result.get("bug_analysis", [])
+#     issues = [transform_bug_to_issue(bug) for bug in bugs]
+#     corrected_code = result.get("corrected_code", "")
+#     return {
+#         "success": True,
+#         "has_json_output": True,
+#         "corrected_code": corrected_code,
+#         "issues": issues,
+#         "raw_output": ""
+#     }
+# # Add history endpoint (placeholder for now)
+# @app.get("/analysis/history")
+# async def get_analysis_history():
+#     # TODO: Implement database storage for history
+#     # For now, return empty array to match frontend expectations
+#     return {"data": []}
+# # Health check endpoint
+# @app.get("/health")
+# async def health_check():
+#     return {
+#         "status": "healthy",
+#         "model_loaded": tokenizer is not None and model is not None
+#     }
+# # Load model
+# print("🔧 Loading model...")
+# tokenizer, model = load_model()
+# print("✅ Model loaded!")
+# if __name__ == "__main__":
+#     import uvicorn
+#     uvicorn.run(app, host="0.0.0.0", port=8000)
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from model import load_model_async, get_model, is_model_loaded, get_model_info
+from analyzer import analyze_code
+import logging
+import asyncio
+import time
+from dotenv import load_dotenv
+load_dotenv()
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+app = FastAPI(
+    title="AI Bug Explainer ML Microservice",
+    description="An AI service that detects and fixes bugs in code",
+    version="1.0.0"
+)
+# CORS setup
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Replace with your frontend URL in prod
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+class AnalyzeRequest(BaseModel):
+    language: str
+    code: str
+# Global variables for model loading status
+model_load_start_time = None
+model_load_task = None
+def transform_bug_to_issue(bug):
+    """Transform ML service bug format to frontend issue format"""
+    return {
+        "lineNumber": bug.get("line_number", 0),
+        "type": bug.get("error_message", "Unknown Error"),
+        "message": bug.get("explanation", "No explanation provided"),
+        "suggestion": bug.get("fix_suggestion", "No suggestion provided")
+    }
+@app.on_event("startup")
+async def startup_event():
+    """Start model loading in background when server starts"""
+    global model_load_start_time, model_load_task
+    logger.info("🚀 Starting ML microservice...")
+    logger.info("🔧 Initiating background model loading...")
+    model_load_start_time = time.time()
+    # Start model loading in background
+    model_load_task = asyncio.create_task(load_model_async())
+    logger.info("✅ Server started! Model is loading in background...")
+@app.get("/health")
+async def health_check():
+    """Enhanced health check with model loading status"""
+    global model_load_start_time
+    model_info = get_model_info()
+    loading_time = None
+    if model_load_start_time:
+        loading_time = round(time.time() - model_load_start_time, 2)
+    return {
+        "status": "healthy",
+        "model_info": model_info,
+        "loading_time_seconds": loading_time,
+        "ready_for_inference": model_info["loaded"]
+    }
+@app.get("/model/status")
+async def model_status():
+    """Get detailed model loading status"""
+    global model_load_start_time
+    model_info = get_model_info()
+    loading_time = None
+    if model_load_start_time:
+        loading_time = round(time.time() - model_load_start_time, 2)
+    return {
+        "model_id": model_info["model_id"],
+        "loaded": model_info["loaded"],
+        "loading": model_info["loading"],
+        "loading_time_seconds": loading_time,
+        "ready": model_info["loaded"]
+    }
+@app.post("/analyze")
+async def analyze(req: AnalyzeRequest):
+    """Original analyze endpoint with model loading check"""
+    logger.info(f"🔍 Received code for analysis ({req.language})")
+    # Check if model is loaded
+    if not is_model_loaded():
+        # Wait for model to load (with timeout)
+        try:
+            await asyncio.wait_for(model_load_task, timeout=300)  # 5 minute timeout
+        except asyncio.TimeoutError:
+            raise HTTPException(
+                status_code=503,
+                detail="Model is still loading. Please try again in a few moments."
+            )
+    try:
+        tokenizer, model = get_model()
+        result = analyze_code(req.language, req.code, tokenizer, model)
+        if result is None:
+            raise HTTPException(status_code=500, detail="Model failed to return any response.")
+        if not isinstance(result, dict):
+            logger.warning("⚠️ Model did not return valid JSON, sending raw output")
+            return {
+                "bugs": [],
+                "corrected_code": "",
+                "raw_output": result
+            }
+        return {
+            "bugs": result.get("bug_analysis", []),
+            "corrected_code": result.get("corrected_code", ""),
+            "raw_output": ""
+        }
+    except Exception as e:
+        logger.error(f"Analysis error: {e}")
+        raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
+@app.post("/analysis/submit")
+async def analyze_for_frontend(req: AnalyzeRequest):
+    """Frontend-compatible endpoint with model loading check"""
+    logger.info(f"🔍 Frontend: Received code for analysis ({req.language})")
+    # Check if model is loaded
+    if not is_model_loaded():
+        # If model is still loading, return appropriate response
+        if model_load_task and not model_load_task.done():
+            return {
+                "success": False,
+                "has_json_output": False,
+                "corrected_code": "",
+                "issues": [],
+                "raw_output": "Model is still loading. Please wait a moment and try again.",
+                "model_status": "loading"
+            }
+        else:
+            # Try to wait for model loading
+            try:
+                await asyncio.wait_for(model_load_task, timeout=30)  # Short timeout for frontend
+            except (asyncio.TimeoutError, Exception):
+                return {
+                    "success": False,
+                    "has_json_output": False,
+                    "corrected_code": "",
+                    "issues": [],
+                    "raw_output": "Model is not ready yet. Please try again in a few moments.",
+                    "model_status": "loading"
+                }
+    try:
+        tokenizer, model = get_model()
+        result = analyze_code(req.language, req.code, tokenizer, model)
+        if result is None:
+            return {
+                "success": False,
+                "has_json_output": False,
+                "corrected_code": "",
+                "issues": [],
+                "raw_output": "Model failed to return any response.",
+                "model_status": "error"
+            }
+        # If result is not valid JSON, return raw output as fallback
+        if not isinstance(result, dict):
+            logger.warning("⚠️ Model did not return valid JSON, showing raw output")
+            return {
+                "success": False,
+                "has_json_output": False,
+                "corrected_code": "",
+                "issues": [],
+                "raw_output": str(result),
+                "model_status": "loaded"
+            }
+        # Successfully parsed JSON
+        bugs = result.get("bug_analysis", [])
+        issues = [transform_bug_to_issue(bug) for bug in bugs]
+        corrected_code = result.get("corrected_code", "")
+        return {
+            "success": True,
+            "has_json_output": True,
+            "corrected_code": corrected_code,
+            "issues": issues,
+            "raw_output": "",
+            "model_status": "loaded"
+        }
+    except Exception as e:
+        logger.error(f"Frontend analysis error: {e}")
+        return {
+            "success": False,
+            "has_json_output": False,
+            "corrected_code": "",
+            "issues": [],
+            "raw_output": f"Analysis failed: {str(e)}",
+            "model_status": "error"
+        }
+@app.get("/analysis/history")
+async def get_analysis_history():
+    """Get analysis history (placeholder)"""
+    return {"data": []}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

model.py ADDED Viewed

	@@ -0,0 +1,124 @@

+# model.py - Optimized version
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from functools import lru_cache
+import os
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+import logging
+logger = logging.getLogger(__name__)
+# Global variables to store loaded model
+_tokenizer = None
+_model = None
+_model_loading = False
+_model_loaded = False
+@lru_cache(maxsize=1)
+def get_model_config():
+    """Cache model configuration"""
+    return {
+        "model_id": "deepseek-ai/deepseek-coder-1.3b-instruct",
+        "torch_dtype": torch.bfloat16,
+        "device_map": "auto",
+        "trust_remote_code": True,
+        # Add these optimizations
+        "low_cpu_mem_usage": True,
+        "use_cache": True,
+    }
+def load_model_sync():
+    """Synchronous model loading with optimizations"""
+    global _tokenizer, _model, _model_loaded
+    if _model_loaded:
+        return _tokenizer, _model
+    config = get_model_config()
+    model_id = config["model_id"]
+    logger.info(f"🔧 Loading model {model_id}...")
+    try:
+        # Set cache directory to avoid re-downloading
+        cache_dir = os.environ.get("TRANSFORMERS_CACHE", "./model_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        # Load tokenizer first (faster)
+        logger.info("📝 Loading tokenizer...")
+        _tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            trust_remote_code=config["trust_remote_code"],
+            cache_dir=cache_dir,
+            use_fast=True,  # Use fast tokenizer if available
+        )
+        # Load model with optimizations
+        logger.info("🧠 Loading model...")
+        _model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            trust_remote_code=config["trust_remote_code"],
+            torch_dtype=config["torch_dtype"],
+            device_map=config["device_map"],
+            low_cpu_mem_usage=config["low_cpu_mem_usage"],
+            cache_dir=cache_dir,
+            offload_folder="offload",
+             offload_state_dict=True
+        )
+        # Set to evaluation mode
+        _model.eval()
+        _model_loaded = True
+        logger.info("✅ Model loaded successfully!")
+        return _tokenizer, _model
+    except Exception as e:
+        logger.error(f"❌ Failed to load model: {e}")
+        raise
+async def load_model_async():
+    """Asynchronous model loading"""
+    global _model_loading
+    if _model_loaded:
+        return _tokenizer, _model
+    if _model_loading:
+        # Wait for ongoing loading to complete
+        while _model_loading and not _model_loaded:
+            await asyncio.sleep(0.1)
+        return _tokenizer, _model
+    _model_loading = True
+    try:
+        # Run model loading in thread pool to avoid blocking
+        loop = asyncio.get_event_loop()
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            tokenizer, model = await loop.run_in_executor(
+                executor, load_model_sync
+            )
+        return tokenizer, model
+    finally:
+        _model_loading = False
+def get_model():
+    """Get the loaded model (for synchronous access)"""
+    if not _model_loaded:
+        return load_model_sync()
+    return _tokenizer, _model
+def is_model_loaded():
+    """Check if model is loaded"""
+    return _model_loaded
+def get_model_info():
+    """Get model information without loading"""
+    config = get_model_config()
+    return {
+        "model_id": config["model_id"],
+        "loaded": _model_loaded,
+        "loading": _model_loading,
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+# torch>=2.1.0
+# transformers>=4.40.0
+# accelerate>=0.25.0
+# bitsandbytes
+# fastapi
+# uvicorn
+#Your original dependencies (optimized versions)
+torch>=2.1.0
+transformers==4.41.1
+accelerate==0.30.1
+bitsandbytes
+fastapi
+uvicorn[standard]
+# Additional optimizations for faster loading
+tokenizers>=0.15.0  # Fast tokenizers (auto-installed with transformers but explicit for optimization)
+safetensors>=0.4.0  # Faster model loading format
+huggingface-hub>=0.19.0  # Better caching and download management
+# Optional performance improvements
+psutil>=5.9.0  # For system monitoring
+python-multipart  # For FastAPI file uploads if needed
+python-dotenv

setup.py ADDED Viewed

	@@ -0,0 +1,106 @@

+#!/usr/bin/env python3
+"""
+Quick setup script to optimize your existing ML microservice.
+Run this to set up caching and pre-download the model.
+"""
+import os
+import sys
+import logging
+from pathlib import Path
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def setup_cache_directory():
+    """Create cache directory for models"""
+    cache_dir = Path("./model_cache")
+    cache_dir.mkdir(exist_ok=True)
+    logger.info(f"✅ Cache directory created: {cache_dir.absolute()}")
+    return cache_dir
+def set_environment_variables():
+    """Set environment variables for optimization"""
+    env_vars = {
+        "TRANSFORMERS_CACHE": "./model_cache",
+        "HF_HOME": "./model_cache",
+        "TORCH_HOME": "./model_cache",
+        "TOKENIZERS_PARALLELISM": "false",
+        "OMP_NUM_THREADS": "4"
+    }
+    for key, value in env_vars.items():
+        os.environ[key] = value
+        logger.info(f"Set {key}={value}")
+def pre_download_model():
+    """Pre-download the model to cache"""
+    try:
+        from transformers import AutoTokenizer, AutoModelForCausalLM
+        model_id = "deepseek-ai/deepseek-coder-1.3b-instruct"
+        cache_dir = "./model_cache"
+        logger.info(f"🔧 Pre-downloading model: {model_id}")
+        logger.info("This may take a few minutes on first run...")
+        # Download tokenizer
+        logger.info("📝 Downloading tokenizer...")
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            cache_dir=cache_dir,
+            trust_remote_code=True
+        )
+        # Download model
+        logger.info("🧠 Downloading model...")
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            cache_dir=cache_dir,
+            trust_remote_code=True,
+            torch_dtype="auto",  # Let it choose the best dtype
+            low_cpu_mem_usage=True,
+        )
+        logger.info("✅ Model downloaded and cached successfully!")
+        logger.info(f"📁 Model cached in: {Path(cache_dir).absolute()}")
+        # Test that everything works
+        logger.info("🧪 Testing model loading...")
+        del model, tokenizer  # Free memory
+        return True
+    except Exception as e:
+        logger.error(f"❌ Failed to pre-download model: {e}")
+        return False
+def main():
+    """Main setup function"""
+    logger.info("🚀 Setting up ML Microservice Optimizations")
+    logger.info("=" * 50)
+    # Step 1: Setup cache directory
+    setup_cache_directory()
+    # Step 2: Set environment variables
+    set_environment_variables()
+    # Step 3: Pre-download model
+    success = pre_download_model()
+    if success:
+        logger.info("\n✅ Setup completed successfully!")
+        logger.info("📋 Next steps:")
+        logger.info("1. Replace your main.py with the optimized version")
+        logger.info("2. Replace your model.py with the optimized version")
+        logger.info("3. Run: python main.py")
+        logger.info("\n🚀 Your server will now start much faster!")
+    else:
+        logger.error("\n❌ Setup failed!")
+        logger.error("Please check your internet connection and try again.")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()