Spaces:

HGKo
/

vision-web-app

Runtime error

App Files Files

xet

Community

David Ko commited on Aug 14

Commit

2567ca0

1 Parent(s): cbaf1c3

Revert from OpenAI API back to local TinyLlama model

Browse files

Files changed (3) hide show

README.md +1 -1
api.py +52 -38
requirements.txt +1 -4

README.md CHANGED Viewed

@@ -83,7 +83,7 @@ This project follows a phased development approach:
 - **YOLOv8**: Fast and accurate object detection
 - **DETR**: DEtection TRansformer for object detection
 - **ViT**: Vision Transformer for image classification
-- **OpenAI API**: For natural language processing and question answering about detected objects
 ## API Endpoints

 - **YOLOv8**: Fast and accurate object detection
 - **DETR**: DEtection TRansformer for object detection
 - **ViT**: Vision Transformer for image classification
+- **TinyLlama**: For natural language processing and question answering about detected objects
 ## API Endpoints

api.py CHANGED Viewed

@@ -161,34 +161,33 @@ except Exception as e:
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
-# OpenAI API 설정 (Llama 모델 대신 사용)
-import os
-import openai
-# OpenAI API 키 설정
-openai_api_key = os.environ.get("OPENAI_API_KEY", "")
-if not openai_api_key:
-    print("Warning: OPENAI_API_KEY environment variable not set")
-# OpenAI 클라이언트 설정
 try:
-    from openai import OpenAI
-    print("Setting up OpenAI client...")
-    if openai_api_key:
-        openai_client = OpenAI(api_key=openai_api_key)
-        print("OpenAI client initialized successfully")
-    else:
-        openai_client = None
-        print("OpenAI client not initialized due to missing API key")
 except Exception as e:
-    print(f"Error setting up OpenAI client: {e}")
-    openai_client = None
 def process_llm_query(vision_results, user_query):
-    """Process a query with OpenAI API using vision results and user text"""
-    if openai_client is None:
-        return {"error": "OpenAI API not available. Please set OPENAI_API_KEY environment variable."}
     # 결과 데이터 요약 (토큰 길이 제한을 위해)
     summarized_results = []
@@ -206,37 +205,52 @@ def process_llm_query(vision_results, user_query):
                 summarized_results.append(summary)
     # Create a prompt combining vision results and user query
-    system_message = "You are an AI assistant analyzing image detection results."
-    user_message = f"""Here are the objects detected in the image: {json.dumps(summarized_results, indent=2)}
     User question: {user_query}
     Please provide a detailed analysis based on the detected objects and the user's question.
     """
-    # OpenAI API 호출
     try:
         start_time = time.time()
-        response = openai_client.chat.completions.create(
-            model="gpt-4",  # 또는 "gpt-3.5-turbo" 등 원하는 모델
-            messages=[
-                {"role": "system", "content": system_message},
-                {"role": "user", "content": user_message}
-            ],
-            max_tokens=500,
-            temperature=0.7,
-            top_p=0.9
-        )
-        response_text = response.choices[0].message.content
         inference_time = time.time() - start_time
         return {
             "response": response_text,
             "performance": {
                 "inference_time": round(inference_time, 3),
-                "model": "OpenAI API"
             }
         }
     except Exception as e:

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
+# LLM model (using an open-access model instead of Llama 4 which requires authentication)
+llm_model = None
+llm_tokenizer = None
 try:
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    print("Loading LLM model... This may take a moment.")
+    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Using TinyLlama as an open-access alternative
+    llm_tokenizer = AutoTokenizer.from_pretrained(model_name)
+    llm_model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float16,
+        # Removing options that require accelerate package
+        # device_map="auto",
+        # load_in_8bit=True
+    ).to(device)
+    print("LLM model loaded successfully")
 except Exception as e:
+    print(f"Error loading LLM model: {e}")
+    llm_model = None
+    llm_tokenizer = None
 def process_llm_query(vision_results, user_query):
+    """Process a query with the LLM model using vision results and user text"""
+    if llm_model is None or llm_tokenizer is None:
+        return {"error": "LLM model not available"}
     # 결과 데이터 요약 (토큰 길이 제한을 위해)
     summarized_results = []
                 summarized_results.append(summary)
     # Create a prompt combining vision results and user query
+    prompt = f"""You are an AI assistant analyzing image detection results.
+    Here are the objects detected in the image: {json.dumps(summarized_results, indent=2)}
     User question: {user_query}
     Please provide a detailed analysis based on the detected objects and the user's question.
     """
+    # Tokenize and generate response
     try:
         start_time = time.time()
+        # 토큰 길이 확인 및 제한
+        tokens = llm_tokenizer.encode(prompt)
+        if len(tokens) > 1500:  # 안전 마진 설정
+            prompt = f"""You are an AI assistant analyzing image detection results.
+            The image contains {len(summarized_results)} detected objects.
+            User question: {user_query}
+            Please provide a general analysis based on the user's question.
+            """
+        inputs = llm_tokenizer(prompt, return_tensors="pt").to(device)
+        with torch.no_grad():
+            output = llm_model.generate(
+                **inputs,
+                max_new_tokens=512,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True
+            )
+        response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)
+        # Remove the prompt from the response
+        if response_text.startswith(prompt):
+            response_text = response_text[len(prompt):].strip()
         inference_time = time.time() - start_time
         return {
             "response": response_text,
             "performance": {
                 "inference_time": round(inference_time, 3),
+                "device": "GPU" if torch.cuda.is_available() else "CPU"
             }
         }
     except Exception as e:

requirements.txt CHANGED Viewed

@@ -19,10 +19,7 @@ fastapi>=0.100.0
 uvicorn[standard]>=0.22.0
 python-multipart>=0.0.5
-# OpenAI API integration (replacing Llama)
-openai>=1.0.0
-# Llama 4 integration (legacy)
 accelerator>=0.20.0
 bitsandbytes>=0.41.0
 sentencepiece>=0.1.99

 uvicorn[standard]>=0.22.0
 python-multipart>=0.0.5
+# Llama 4 integration
 accelerator>=0.20.0
 bitsandbytes>=0.41.0
 sentencepiece>=0.1.99