Spaces:

HGKo
/

vision-web-app

Runtime error

App Files Files Community

David Ko commited on Aug 14

Commit

cbaf1c3

1 Parent(s): 0524412

Replace Llama model with OpenAI API for question answering

Browse files

Files changed (3) hide show

README.md +1 -0
api.py +38 -52
requirements.txt +4 -1

README.md CHANGED Viewed

@@ -83,6 +83,7 @@ This project follows a phased development approach:
 - **YOLOv8**: Fast and accurate object detection
 - **DETR**: DEtection TRansformer for object detection
 - **ViT**: Vision Transformer for image classification
 ## API Endpoints

 - **YOLOv8**: Fast and accurate object detection
 - **DETR**: DEtection TRansformer for object detection
 - **ViT**: Vision Transformer for image classification
+- **OpenAI API**: For natural language processing and question answering about detected objects
 ## API Endpoints

api.py CHANGED Viewed

@@ -161,33 +161,34 @@ except Exception as e:
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
-# LLM model (using an open-access model instead of Llama 4 which requires authentication)
-llm_model = None
-llm_tokenizer = None
 try:
-    from transformers import AutoModelForCausalLM, AutoTokenizer
-    print("Loading LLM model... This may take a moment.")
-    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  # Using TinyLlama as an open-access alternative
-    llm_tokenizer = AutoTokenizer.from_pretrained(model_name)
-    llm_model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        # Removing options that require accelerate package
-        # device_map="auto",
-        # load_in_8bit=True
-    ).to(device)
-    print("LLM model loaded successfully")
 except Exception as e:
-    print(f"Error loading LLM model: {e}")
-    llm_model = None
-    llm_tokenizer = None
 def process_llm_query(vision_results, user_query):
-    """Process a query with the LLM model using vision results and user text"""
-    if llm_model is None or llm_tokenizer is None:
-        return {"error": "LLM model not available"}
     # 결과 데이터 요약 (토큰 길이 제한을 위해)
     summarized_results = []
@@ -205,52 +206,37 @@ def process_llm_query(vision_results, user_query):
                 summarized_results.append(summary)
     # Create a prompt combining vision results and user query
-    prompt = f"""You are an AI assistant analyzing image detection results.
-    Here are the objects detected in the image: {json.dumps(summarized_results, indent=2)}
     User question: {user_query}
     Please provide a detailed analysis based on the detected objects and the user's question.
     """
-    # Tokenize and generate response
     try:
         start_time = time.time()
-        # 토큰 길이 확인 및 제한
-        tokens = llm_tokenizer.encode(prompt)
-        if len(tokens) > 1500:  # 안전 마진 설정
-            prompt = f"""You are an AI assistant analyzing image detection results.
-            The image contains {len(summarized_results)} detected objects.
-            User question: {user_query}
-            Please provide a general analysis based on the user's question.
-            """
-        inputs = llm_tokenizer(prompt, return_tensors="pt").to(device)
-        with torch.no_grad():
-            output = llm_model.generate(
-                **inputs,
-                max_new_tokens=512,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True
-            )
-        response_text = llm_tokenizer.decode(output[0], skip_special_tokens=True)
-        # Remove the prompt from the response
-        if response_text.startswith(prompt):
-            response_text = response_text[len(prompt):].strip()
         inference_time = time.time() - start_time
         return {
             "response": response_text,
             "performance": {
                 "inference_time": round(inference_time, 3),
-                "device": "GPU" if torch.cuda.is_available() else "CPU"
             }
         }
     except Exception as e:

 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
+# OpenAI API 설정 (Llama 모델 대신 사용)
+import os
+import openai
+# OpenAI API 키 설정
+openai_api_key = os.environ.get("OPENAI_API_KEY", "")
+if not openai_api_key:
+    print("Warning: OPENAI_API_KEY environment variable not set")
+# OpenAI 클라이언트 설정
 try:
+    from openai import OpenAI
+    print("Setting up OpenAI client...")
+    if openai_api_key:
+        openai_client = OpenAI(api_key=openai_api_key)
+        print("OpenAI client initialized successfully")
+    else:
+        openai_client = None
+        print("OpenAI client not initialized due to missing API key")
 except Exception as e:
+    print(f"Error setting up OpenAI client: {e}")
+    openai_client = None
 def process_llm_query(vision_results, user_query):
+    """Process a query with OpenAI API using vision results and user text"""
+    if openai_client is None:
+        return {"error": "OpenAI API not available. Please set OPENAI_API_KEY environment variable."}
     # 결과 데이터 요약 (토큰 길이 제한을 위해)
     summarized_results = []
                 summarized_results.append(summary)
     # Create a prompt combining vision results and user query
+    system_message = "You are an AI assistant analyzing image detection results."
+    user_message = f"""Here are the objects detected in the image: {json.dumps(summarized_results, indent=2)}
     User question: {user_query}
     Please provide a detailed analysis based on the detected objects and the user's question.
     """
+    # OpenAI API 호출
     try:
         start_time = time.time()
+        response = openai_client.chat.completions.create(
+            model="gpt-4",  # 또는 "gpt-3.5-turbo" 등 원하는 모델
+            messages=[
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": user_message}
+            ],
+            max_tokens=500,
+            temperature=0.7,
+            top_p=0.9
+        )
+        response_text = response.choices[0].message.content
         inference_time = time.time() - start_time
         return {
             "response": response_text,
             "performance": {
                 "inference_time": round(inference_time, 3),
+                "model": "OpenAI API"
             }
         }
     except Exception as e:

requirements.txt CHANGED Viewed

@@ -19,7 +19,10 @@ fastapi>=0.100.0
 uvicorn[standard]>=0.22.0
 python-multipart>=0.0.5
-# Llama 4 integration
 accelerator>=0.20.0
 bitsandbytes>=0.41.0
 sentencepiece>=0.1.99

 uvicorn[standard]>=0.22.0
 python-multipart>=0.0.5
+# OpenAI API integration (replacing Llama)
+openai>=1.0.0
+# Llama 4 integration (legacy)
 accelerator>=0.20.0
 bitsandbytes>=0.41.0
 sentencepiece>=0.1.99