Spaces:

Agents-MCP-Hackathon
/

wild-fire-tracker

Sleeping

App Files Files Community

space-sue commited on 21 days ago

Commit

73f8435

1 Parent(s): 1d3dd21

fire and smoke alerts

Browse files

Files changed (4) hide show

.gitignore +3 -0
app.py +110 -67
mcp_client.py +206 -0
rtsp_server.py +42 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,5 @@
 .env
 yolov8x-world.pt.eac99ff4aff54a2a95f4462dc49b3d49.partial

 .env
 yolov8x-world.pt.eac99ff4aff54a2a95f4462dc49b3d49.partial
+fire.mp4
+test.mp4
+yolov8s-world.pt

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import time
 import os
 from datetime import datetime
 from ultralytics import YOLO
-from transformers import BlipProcessor, BlipForQuestionAnswering
 import torch
 import dotenv
 dotenv.load_dotenv()
@@ -17,44 +17,72 @@ For more information on `huggingface_hub` Inference API support, please check th
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta",token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
 # Load YOLO-World model
-model = YOLO('yolov8x-world.pt')
-# Load BLIP model for VQA
-blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base", token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
-vqa_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base", token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
 device = "cuda" if torch.cuda.is_available() else "cpu"
 vqa_model = vqa_model.to(device)
 def analyze_fire_scene(frame):
-    # Run YOLO-World inference with custom prompts
-    results = model(frame, text=["fire", "flame", "smoke", "burning", "wildfire"])
-    # Initialize detection flags and details
     fire_detected = False
     smoke_detected = False
     fire_details = []
-    # Process results
-    for result in results:
-        boxes = result.boxes
-        for box in boxes:
-            confidence = float(box.conf[0])
-            if confidence > 0.5:
-                class_name = result.names[int(box.cls[0])]
-                if class_name in ['fire', 'flame', 'burning', 'wildfire']:
-                    fire_detected = True
-                    # Get bounding box coordinates
-                    x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
-                    # Extract the region of interest
-                    roi = frame[int(y1):int(y2), int(x1):int(x2)]
-                    fire_details.append({
-                        'type': class_name,
-                        'confidence': confidence,
-                        'location': (x1, y1, x2, y2),
-                        'roi': roi
-                    })
-                elif class_name == 'smoke':
-                    smoke_detected = True
     return fire_detected, smoke_detected, fire_details
@@ -77,52 +105,67 @@ def get_fire_analysis(frame, fire_details):
         # out = vqa_model.generate(**inputs)
         # print(blip_processor.decode(out[0], skip_special_tokens=True))
-        # Generate answer
         with torch.no_grad():
-            outputs = vqa_model.generate(
-                **inputs,
-                max_length=20,
-                num_beams=3,
-                min_length=1,
-                top_p=0.9,
-                repetition_penalty=1.5,
-                length_penalty=1.0,
-                temperature=1.0,
-            )
-            answer = blip_processor.decode(outputs[0], skip_special_tokens=True)
-            analysis.append(f"Q: {question}\nA: {answer}")
     return analysis
-def check_for_fire():
-    # Request webcam access
-    cap = cv2.VideoCapture(0)
     if not cap.isOpened():
-        return "Error: Could not access webcam"
-    # Read a frame
-    ret, frame = cap.read()
-    if not ret:
-        cap.release()
-        return "Error: Could not read from webcam"
-    # Detect fire and smoke
-    fire_detected, smoke_detected, fire_details = analyze_fire_scene(frame)
-    # Release webcam
     cap.release()
-    # Get location (you might want to implement a more sophisticated location detection)
-    location = "Webcam Location"  # Replace with actual location detection
-    if fire_detected:
-        # Get detailed analysis of the fire
-        analysis = get_fire_analysis(frame, fire_details)
-        return f"Fire detected at {location}!\n\nAnalysis:\n" + "\n".join(analysis)
-    elif smoke_detected:
-        return f"Smoke detected at {location}!"
-    else:
-        return "No fire or smoke detected"
 def respond(
     message,
@@ -134,7 +177,7 @@ def respond(
 ):
     # Check if user wants to detect fire
     if "detect fire" in message.lower():
-        return check_for_fire()
     messages = [{"role": "system", "content": system_message}]

 import os
 from datetime import datetime
 from ultralytics import YOLO
+from transformers import AutoProcessor, AutoModelForVision2Seq
 import torch
 import dotenv
 dotenv.load_dotenv()
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta",token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
 # Load YOLO-World model
+model = YOLO('yolov8s-world.pt')
+# Load SmolVLM for VQA (lighter and better)
+from transformers import AutoProcessor, AutoModelForVision2Seq
+vqa_processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct", token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
+vqa_model = AutoModelForVision2Seq.from_pretrained("HuggingFaceTB/SmolVLM-Instruct", torch_dtype=torch.float16, token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
 device = "cuda" if torch.cuda.is_available() else "cpu"
 vqa_model = vqa_model.to(device)
 def analyze_fire_scene(frame):
+    """Fast fire/smoke detection with early exit"""
+    from PIL import Image
+    # Convert frame to PIL Image
+    image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+    # Priority questions - most likely to detect fire/smoke quickly
+    questions = [
+        "Is there fire or flames in this image?",
+        "Is there smoke in this image?"
+    ]
     fire_detected = False
     smoke_detected = False
     fire_details = []
+    for question in questions:
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": question}
+            ]
+        }]
+        prompt = vqa_processor.apply_chat_template(messages, tokenize=False)
+        inputs = vqa_processor(text=prompt, images=[image], return_tensors="pt")
+        inputs = inputs.to(device)
+        with torch.no_grad():
+            outputs = vqa_model.generate(**inputs, max_new_tokens=20, do_sample=False)  # Shorter responses
+        answer = vqa_processor.decode(outputs[0], skip_special_tokens=True)
+        answer = answer.split("Assistant:")[-1].strip() if "Assistant:" in answer else answer
+        answer_lower = answer.lower()
+        # Check fire
+        if 'fire' in question.lower():
+            fire_keywords = ['fire', 'flame', 'burning', 'blaze', 'yes']
+            if any(word in answer_lower for word in fire_keywords):
+                fire_detected = True
+                fire_details.append({
+                    'type': 'fire_detected_by_vision',
+                    'confidence': 0.8,
+                    'description': answer
+                })
+                # Early exit if fire detected
+                return True, smoke_detected, fire_details
+        # Check smoke
+        if 'smoke' in question.lower():
+            smoke_keywords = ['smoke', 'smoky', 'yes']
+            if any(word in answer_lower for word in smoke_keywords):
+                smoke_detected = True
+                # Early exit if smoke detected
+                return fire_detected, True, fire_details
     return fire_detected, smoke_detected, fire_details
         # out = vqa_model.generate(**inputs)
         # print(blip_processor.decode(out[0], skip_special_tokens=True))
+        # Generate answer using SmolVLM
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": frame},
+                    {"type": "text", "text": question}
+                ]
+            }
+        ]
+        prompt = vqa_processor.apply_chat_template(messages, tokenize=False)
+        inputs = vqa_processor(text=prompt, images=[frame], return_tensors="pt")
+        inputs = inputs.to(device)
         with torch.no_grad():
+            outputs = vqa_model.generate(**inputs, max_new_tokens=50, do_sample=False)
+        answer = vqa_processor.decode(outputs[0], skip_special_tokens=True)
+        answer = answer.split("Assistant:")[-1].strip() if "Assistant:" in answer else answer
+        analysis.append(f"Q: {question}\nA: {answer}")
     return analysis
+def check_for_fire(video_source=0):
+    """Real-time fire detection processing every 100th frame"""
+    cap = cv2.VideoCapture(video_source)
     if not cap.isOpened():
+        return "Error: Could not access video source"
+    frame_count = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frame_count += 1
+        # Process every 100th frame for real-time performance
+        if frame_count % 100 == 0:
+            print(f"Analyzing frame {frame_count}...")
+            # Detect fire and smoke
+            fire_detected, smoke_detected, fire_details = analyze_fire_scene(frame)
+            if fire_detected or smoke_detected:
+                cap.release()
+                location = "Video Stream"
+                if fire_detected:
+                    return f"🔥 FIRE DETECTED at {location}! Frame: {frame_count}"
+                elif smoke_detected:
+                    return f"💨 SMOKE DETECTED at {location}! Frame: {frame_count}"
+        # Break on 'q' key (for testing)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
     cap.release()
+    return "No fire or smoke detected in video stream"
 def respond(
     message,
 ):
     # Check if user wants to detect fire
     if "detect fire" in message.lower():
+        return check_for_fire(0)  # Use webcam
     messages = [{"role": "system", "content": system_message}]

mcp_client.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import gradio as gr
+import cv2
+import threading
+import time
+import requests
+import json
+from datetime import datetime
+class FireDetectionClient:
+    def __init__(self):
+        self.video_sources = {}
+        self.detection_threads = {}
+        self.running = {}
+        self.mcp_server_url = "http://localhost:7860"
+    def detect_fire_mcp(self, frame):
+        """Send frame to MCP server for fire detection"""
+        try:
+            # Convert frame to base64 or save temporarily
+            import base64
+            import io
+            from PIL import Image
+            # Convert frame to PIL Image
+            image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+            # Convert to base64
+            buffer = io.BytesIO()
+            image.save(buffer, format='JPEG')
+            img_str = base64.b64encode(buffer.getvalue()).decode()
+            # Send to MCP server (assuming it has an API endpoint)
+            response = requests.post(
+                f"{self.mcp_server_url}/detect_fire",
+                json={"image": img_str},
+                timeout=5
+            )
+            if response.status_code == 200:
+                return response.json()
+            else:
+                return {"error": "MCP server error"}
+        except Exception as e:
+            return {"error": str(e)}
+    def monitor_video_source(self, source_id, video_source):
+        """Monitor a video source for fire/smoke detection"""
+        cap = cv2.VideoCapture(video_source)
+        if not cap.isOpened():
+            return f"Error: Could not open video source {source_id}"
+        frame_count = 0
+        self.running[source_id] = True
+        while self.running.get(source_id, False):
+            ret, frame = cap.read()
+            if not ret:
+                break
+            frame_count += 1
+            # Process every 100th frame
+            if frame_count % 100 == 0:
+                timestamp = datetime.now().strftime("%H:%M:%S")
+                print(f"[{timestamp}] Source {source_id}: Analyzing frame {frame_count}")
+                # Simple fire detection (replace with MCP call)
+                fire_detected, smoke_detected = self.simple_fire_detection(frame)
+                if fire_detected or smoke_detected:
+                    alert = f"🚨 ALERT - Source {source_id} at {timestamp}:\n"
+                    if fire_detected:
+                        alert += "🔥 FIRE DETECTED!\n"
+                    if smoke_detected:
+                        alert += "💨 SMOKE DETECTED!\n"
+                    alert += f"Frame: {frame_count}"
+                    print(alert)
+                    # Here you could send notifications, save alerts, etc.
+            time.sleep(0.01)  # Small delay to prevent CPU overload
+        cap.release()
+        print(f"Stopped monitoring source {source_id}")
+    def simple_fire_detection(self, frame):
+        """Simple color-based fire detection as fallback"""
+        # Convert to HSV for better color detection
+        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
+        # Fire color ranges (orange/red/yellow)
+        fire_lower1 = (0, 50, 50)
+        fire_upper1 = (10, 255, 255)
+        fire_lower2 = (170, 50, 50)
+        fire_upper2 = (180, 255, 255)
+        # Create masks
+        mask1 = cv2.inRange(hsv, fire_lower1, fire_upper1)
+        mask2 = cv2.inRange(hsv, fire_lower2, fire_upper2)
+        fire_mask = cv2.bitwise_or(mask1, mask2)
+        # Smoke detection (gray areas)
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        smoke_mask = cv2.inRange(gray, 100, 200)
+        # Check if significant area detected
+        fire_area = cv2.countNonZero(fire_mask)
+        smoke_area = cv2.countNonZero(smoke_mask)
+        fire_detected = fire_area > 1000  # Threshold for fire
+        smoke_detected = smoke_area > 5000  # Threshold for smoke
+        return fire_detected, smoke_detected
+    def start_monitoring(self, sources):
+        """Start monitoring selected video sources"""
+        results = []
+        for i, source in enumerate(sources):
+            if source.strip():
+                source_id = f"Source_{i+1}"
+                # Convert source to appropriate format
+                if source.isdigit():
+                    video_source = int(source)
+                else:
+                    video_source = source
+                # Start monitoring thread
+                thread = threading.Thread(
+                    target=self.monitor_video_source,
+                    args=(source_id, video_source),
+                    daemon=True
+                )
+                self.detection_threads[source_id] = thread
+                thread.start()
+                results.append(f"✅ Started monitoring {source_id}: {source}")
+        return "\n".join(results) if results else "No valid sources provided"
+    def stop_monitoring(self):
+        """Stop all monitoring threads"""
+        for source_id in self.running:
+            self.running[source_id] = False
+        return "🛑 Stopped all monitoring"
+# Initialize client
+client = FireDetectionClient()
+def create_interface():
+    """Create Gradio interface for fire detection client"""
+    with gr.Blocks(title="Fire Detection Client") as interface:
+        gr.Markdown("# 🔥 Fire Detection Client")
+        gr.Markdown("Monitor up to 4 video sources for fire and smoke detection")
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### Video Sources")
+                source1 = gr.Textbox(label="Source 1 (webcam: 0, file path, or RTSP URL)", placeholder="0")
+                source2 = gr.Textbox(label="Source 2", placeholder="rtsp://localhost:8554/stream")
+                source3 = gr.Textbox(label="Source 3", placeholder="C:/path/to/video.mp4")
+                source4 = gr.Textbox(label="Source 4", placeholder="")
+                with gr.Row():
+                    start_btn = gr.Button("🚀 Start Monitoring", variant="primary")
+                    stop_btn = gr.Button("🛑 Stop Monitoring", variant="secondary")
+            with gr.Column():
+                gr.Markdown("### Status")
+                status_output = gr.Textbox(
+                    label="Monitoring Status",
+                    lines=10,
+                    interactive=False
+                )
+        gr.Markdown("### Instructions")
+        gr.Markdown("""
+        - **Webcam**: Enter `0` for default webcam, `1` for second camera
+        - **Video File**: Enter full path like `C:/videos/fire.mp4`
+        - **RTSP Stream**: Enter URL like `rtsp://localhost:8554/stream`
+        - **Detection**: Analyzes every 100th frame for real-time performance
+        - **Alerts**: Check console output for fire/smoke detection alerts
+        """)
+        # Event handlers
+        start_btn.click(
+            fn=lambda s1, s2, s3, s4: client.start_monitoring([s1, s2, s3, s4]),
+            inputs=[source1, source2, source3, source4],
+            outputs=status_output
+        )
+        stop_btn.click(
+            fn=client.stop_monitoring,
+            outputs=status_output
+        )
+    return interface
+if __name__ == "__main__":
+    interface = create_interface()
+    interface.launch(server_port=7861, share=False)

rtsp_server.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import cv2
+import subprocess
+import threading
+import time
+def stream_mp4_as_rtsp(mp4_file, rtsp_port=8554):
+    """Stream MP4 file as RTSP in a loop"""
+    rtsp_url = f"rtsp://localhost:{rtsp_port}/stream"
+    # FFmpeg command to stream MP4 as RTSP
+    cmd = [
+        'ffmpeg',
+        '-re',  # Read input at native frame rate
+        '-stream_loop', '-1',  # Loop infinitely
+        '-i', mp4_file,
+        '-c', 'copy',  # Copy without re-encoding
+        '-f', 'rtsp',
+        rtsp_url
+    ]
+    print(f"Starting RTSP server...")
+    print(f"RTSP URL: {rtsp_url}")
+    try:
+        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return process, rtsp_url
+    except Exception as e:
+        print(f"Error starting RTSP server: {e}")
+        return None, None
+if __name__ == "__main__":
+    mp4_file = input("Enter MP4 file path: ")
+    process, url = stream_mp4_as_rtsp(mp4_file)
+    if process:
+        print(f"RTSP stream running at: {url}")
+        print("Press Ctrl+C to stop")
+        try:
+            process.wait()
+        except KeyboardInterrupt:
+            process.terminate()
+            print("RTSP server stopped")