Spaces:

sksameermujahid
/

property_verification

Sleeping

App Files Files Community

sksameermujahid commited on Apr 23

Commit

f558e11

verified ·

1 Parent(s): 4340445

Update newapp.py

Browse files

Files changed (1) hide show

newapp.py +610 -612

newapp.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from flask import Flask, render_template, request, jsonify
 from flask_cors import CORS
 import torch
-from transformers import pipeline, CLIPProcessor, CLIPModel, AutoModelForSequenceClassification, AutoTokenizer
 import base64
 import io
 import re
@@ -23,309 +23,68 @@ import logging
 from functools import lru_cache
 import time
 import math
 import threading
 import gc
-import asyncio
-from concurrent.futures import ThreadPoolExecutor
-from werkzeug.serving import WSGIRequestHandler
-# Initialize Flask app
 app = Flask(__name__)
-CORS(app)
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
     handlers=[
         logging.StreamHandler()
     ]
 )
 logger = logging.getLogger(__name__)
-# Global variables for models with lazy loading and memory management
-models = {
-    'clip_processor': None,
-    'clip_model': None,
-    'sentence_model': None,
-    'nlp': None,
-    'geocoder': None,
-    'summarizer': None,
-    'classifier': None
-}
-def load_model_safely(model_name, loader_func, *args, **kwargs):
-    """Safely load a model with memory management"""
-    try:
-        # Clear memory before loading
-        gc.collect()
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
-        # Remove memory-efficient settings for non-transformers models
-        if model_name in ['geocoder', 'nlp']:
-            kwargs.pop('torch_dtype', None)
-            kwargs.pop('low_cpu_mem_usage', None)
-        else:
-            # Add memory-efficient settings for transformers models
-            if 'device' in kwargs:
-                kwargs['device'] = -1  # Force CPU usage
-            if 'model' in kwargs:
-                # Use smallest available models
-                if kwargs['model'] == 'facebook/bart-large-cnn':
-                    kwargs['model'] = 'sshleifer/distilbart-cnn-6-6'  # Even smaller CNN model
-                elif kwargs['model'] == 'facebook/bart-large-mnli':
-                    kwargs['model'] = 'cross-encoder/nli-distilroberta-base'  # Smaller NLI model
-        model = loader_func(*args, **kwargs)
-        # Set model to evaluation mode and disable gradients
-        if hasattr(model, 'eval'):
-            model.eval()
-        if hasattr(model, 'requires_grad_'):
-            model.requires_grad_(False)
-        logger.info(f"Successfully loaded model: {model_name}")
-        return model
-    except Exception as e:
-        logger.error(f"Error loading model {model_name}: {str(e)}")
-        return None
-def cleanup_models():
-    """Clean up model resources"""
-    try:
-        for model_name, model in models.items():
-            if model is not None:
-                if hasattr(model, 'cpu'):
-                    model.cpu()
-                if hasattr(model, 'to'):
-                    model.to('cpu')
-                del model
-                models[model_name] = None
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-    except Exception as e:
-        logger.error(f"Error in cleanup_models: {str(e)}")
-@app.before_request
-def before_request():
-    """Clear memory before each request"""
-    try:
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-    except Exception as e:
-        logger.error(f"Error in before_request: {str(e)}")
-@app.after_request
-def after_request(response):
-    """Clean up memory after each request"""
-    try:
-        cleanup_models()
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-    except Exception as e:
-        logger.error(f"Error in after_request: {str(e)}")
-    return response
-def get_model(model_name):
-    """Lazy loading of models with optimized configurations"""
-    if models[model_name] is None:
-        try:
-            if model_name == 'clip_processor':
-                models[model_name] = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-            elif model_name == 'clip_model':
-                models[model_name] = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-            elif model_name == 'sentence_model':
-                models[model_name] = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-            elif model_name == 'nlp':
-                models[model_name] = spacy.load('en_core_web_sm')
-            elif model_name == 'geocoder':
-                models[model_name] = Nominatim(user_agent="indian_property_verifier", timeout=10)
-            elif model_name == 'summarizer':
-                models[model_name] = load_model(
-                    "summarization",
-                    "sshleifer/distilbart-cnn-6-6"
-                )
-            elif model_name == 'classifier':
-                models[model_name] = load_model(
-                    "zero-shot-classification",
-                    "facebook/bart-large-mnli"
-                )
-            # Set model to evaluation mode and disable gradients
-            if hasattr(models[model_name], 'eval'):
-                models[model_name].eval()
-            if hasattr(models[model_name], 'requires_grad_'):
-                models[model_name].requires_grad_(False)
-            logger.info(f"Successfully loaded model: {model_name}")
-        except Exception as e:
-            logger.error(f"Error loading model {model_name}: {str(e)}")
-            models[model_name] = None
-    return models[model_name]
-def process_batch(items, batch_size=4):
-    """Process items in batches to manage memory"""
-    for i in range(0, len(items), batch_size):
-        batch = items[i:i + batch_size]
-        yield batch
-        # Clean up after each batch
-        gc.collect()
-        torch.cuda.empty_cache() if torch.cuda.is_available() else None
-def analyze_images(images, batch_size=4):
-    """Analyze images in batches"""
-    results = []
-    for batch in process_batch(images, batch_size):
-        batch_results = []
-        for img in batch:
-            try:
-                analysis = analyze_image(img)
-                batch_results.append(analysis)
-            except Exception as e:
-                logger.error(f"Error analyzing image: {str(e)}")
-                batch_results.append({'error': str(e)})
-        results.extend(batch_results)
-    return results
-def analyze_documents(documents, batch_size=2):
-    """Analyze documents in batches"""
-    results = []
-    for batch in process_batch(documents, batch_size):
-        batch_results = []
-        for doc in batch:
-            try:
-                analysis = analyze_pdf_content(doc)
-                batch_results.append(analysis)
-            except Exception as e:
-                logger.error(f"Error analyzing document: {str(e)}")
-                batch_results.append({'error': str(e)})
-        results.extend(batch_results)
-    return results
-def initialize_models():
-    """Initialize all models with proper error handling"""
-    try:
-        # Initialize geocoder
-        models['geocoder'] = Nominatim(user_agent="indian_property_verifier", timeout=10)
-        logger.info("Geocoder initialized successfully")
-    except Exception as e:
-        logger.error(f"Error initializing geocoder: {str(e)}")
-    try:
-        # Initialize CLIP model
-        models['clip_processor'] = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-        models['clip_model'] = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-        logger.info("CLIP model loaded successfully")
-    except Exception as e:
-        logger.error(f"Error loading CLIP model: {str(e)}")
-    try:
-        # Initialize sentence transformer
-        models['sentence_model'] = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-        logger.info("Sentence transformer loaded successfully")
-    except Exception as e:
-        logger.error(f"Error loading sentence transformer: {str(e)}")
-    try:
-        # Initialize spaCy
-        models['nlp'] = spacy.load('en_core_web_sm')
-        logger.info("spaCy model loaded successfully")
-    except Exception as e:
-        logger.error(f"Error loading spaCy model: {str(e)}")
-    try:
-        # Initialize summarizer
-        models['summarizer'] = pipeline(
-            "summarization",
-            model="sshleifer/distilbart-cnn-6-6",
-            device=-1,
-            max_length=100,
-            min_length=20
-        )
-        logger.info("Summarizer model loaded successfully")
-    except Exception as e:
-        logger.error(f"Error loading summarizer model: {str(e)}")
-    try:
-        # Initialize classifier
-        models['classifier'] = pipeline(
-            "zero-shot-classification",
-            model="cross-encoder/nli-distilroberta-base",
-            device=-1
-        )
-        logger.info("Classifier model loaded successfully")
-    except Exception as e:
-        logger.error(f"Error loading classifier model: {str(e)}")
-# Cache models
-@lru_cache(maxsize=10)
-def load_model(task, model_name):
-    try:
-        logger.info(f"Loading model: {model_name} for task: {task}")
-        # Use smaller models for CPU
-        if task == "zero-shot-classification":
-            from transformers import AutoModelForSequenceClassification, AutoTokenizer
-            # Use a smaller model for zero-shot classification
-            model = AutoModelForSequenceClassification.from_pretrained(
-                "facebook/bart-large-mnli",
-                torch_dtype=torch.float32,
-                device_map="auto",
-                low_cpu_mem_usage=True
-            )
-            tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")
-            return pipeline(
-                task,
-                model=model,
-                tokenizer=tokenizer,
-                device=-1,
-                torch_dtype=torch.float32
-            )
-        elif task == "summarization":
-            # Use a smaller model for summarization
-            return pipeline(
-                task,
-                model="sshleifer/distilbart-cnn-6-6",
-                device=-1,
-                torch_dtype=torch.float32,
-                model_kwargs={"low_cpu_mem_usage": True}
-            )
-        elif task == "text-classification":
-            # Use a smaller model for text classification
-            return pipeline(
-                task,
-                model="distilbert-base-uncased-finetuned-sst-2-english",
-                device=-1,
-                torch_dtype=torch.float32,
-                model_kwargs={"low_cpu_mem_usage": True}
-            )
-        else:
-            # Default pipeline for other tasks with memory optimization
-            return pipeline(
-                task,
-                model=model_name,
-                device=-1,
-                torch_dtype=torch.float32,
-                model_kwargs={"low_cpu_mem_usage": True}
-            )
-    except Exception as e:
-        logger.error(f"Error loading model {model_name}: {str(e)}")
-        # Try simpler configuration
-        try:
-            logger.info("Attempting simpler configuration...")
-            return pipeline(
-                task,
-                model=model_name,
-                device=-1,
-                model_kwargs={"low_cpu_mem_usage": True}
-            )
-        except Exception as e2:
-            logger.error(f"Simpler configuration also failed: {str(e2)}")
-            raise
 def make_json_serializable(obj):
     try:
@@ -365,17 +124,10 @@ def get_location():
                 'message': 'Latitude and longitude are required'
             }), 400
-        if not models['geocoder']:
-            logger.error("Geocoder not initialized")
-            return jsonify({
-                'status': 'error',
-                'message': 'Service temporarily unavailable'
-            }), 503
         # Retry geocoding up to 3 times
         for attempt in range(3):
             try:
-                location = models['geocoder'].reverse((latitude, longitude), exactly_one=True)
                 if location:
                     address_components = location.raw.get('address', {})
                     return jsonify({
@@ -410,91 +162,80 @@ def get_location():
 @app.route('/verify', methods=['POST'])
 def verify_property():
     try:
-        # Initialize data dictionary
-        data = {
-            'property_name': '',
-            'property_type': '',
-            'status': '',
-            'description': '',
-            'address': '',
-            'city': '',
-            'state': '',
-            'country': 'India',
-            'zip': '',
-            'latitude': '',
-            'longitude': '',
-            'bedrooms': '',
-            'bathrooms': '',
-            'total_rooms': '',
-            'year_built': '',
-            'parking': '',
-            'sq_ft': '',
-            'market_value': '',
-            'amenities': '',
-            'nearby_landmarks': '',
-            'legal_details': ''
-        }
-        # Try to get data from JSON first
-        if request.is_json:
-            json_data = request.get_json()
-            if json_data:
-                for key in data:
-                    if key in json_data:
-                        data[key] = str(json_data[key]).strip()
-        # Then try form data
-        elif request.form:
-            for key in data:
-                if key in request.form:
-                    data[key] = request.form.get(key, '').strip()
-        # Check if we have at least some basic data
-        if not any(data.values()):
-            logger.warning("No data provided in request")
             return jsonify({
                 'error': 'No data provided',
                 'status': 'error'
             }), 400
-        # Initialize results with default values
-        results = {
-            'report_id': str(uuid.uuid4()),
-            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
-            'summary': None,
-            'fraud_classification': None,
-            'trust_score': {'score': 0, 'reasoning': 'Insufficient data'},
-            'suggestions': [],
-            'quality_assessment': None,
-            'address_verification': None,
-            'cross_validation': None,
-            'location_analysis': None,
-            'price_analysis': None,
-            'legal_analysis': None,
-            'document_analysis': {'pdf_count': 0, 'pdf_texts': [], 'pdf_analysis': []},
-            'image_analysis': {'image_count': 0, 'image_analysis': []},
-            'specs_verification': None,
-            'market_analysis': None,
-            'images': [],
-            'missing_data': []
         }
-        # Process images in batches if available
         if 'images' in request.files:
             image_files = request.files.getlist('images')
-            results['image_analysis']['image_analysis'] = analyze_images(image_files)
-            results['image_analysis']['image_count'] = len(results['image_analysis']['image_analysis'])
-        else:
-            results['missing_data'].append("No images provided")
-        # Process documents in batches if available
         if 'documents' in request.files:
             pdf_files = request.files.getlist('documents')
-            results['document_analysis']['pdf_analysis'] = analyze_documents(pdf_files)
-            results['document_analysis']['pdf_count'] = len(results['document_analysis']['pdf_analysis'])
-        else:
-            results['missing_data'].append("No documents provided")
-        # Generate consolidated text from available data
         consolidated_text = f"""
         Property Name: {data['property_name']}
         Property Type: {data['property_type']}
@@ -512,92 +253,73 @@ def verify_property():
         Legal Details: {data['legal_details']}
         """
-        # Perform analysis based on available data
         try:
-            # Generate property summary if basic data is available
-            if data['property_name'] and data['property_type']:
-                summarizer = get_model('summarizer')
-                if summarizer:
-                    results['summary'] = generate_property_summary(data)
                 else:
-                    results['missing_data'].append("Summary generation unavailable")
-            # Perform fraud classification if enough data is available
-            if len(consolidated_text.strip()) > 50:
-                classifier = get_model('classifier')
-                if classifier:
-                    results['fraud_classification'] = classify_fraud(consolidated_text, data, classifier)
             else:
-                    results['missing_data'].append("Fraud classification unavailable")
-            # Generate trust score based on available data
-            if len(consolidated_text.strip()) > 50:
-                results['trust_score'] = generate_trust_score(consolidated_text, [], [])
-            # Generate suggestions based on available data
-            if len(consolidated_text.strip()) > 50:
-                results['suggestions'] = generate_suggestions(consolidated_text, data)
-            # Address verification if location data is available
-            if data['address'] and data['city'] and data['state']:
-                geocoder = get_model('geocoder')
-                if geocoder:
-                    results['address_verification'] = verify_address(data)
-                else:
-                    results['missing_data'].append("Address verification unavailable")
-            # Cross validation if property details are available
-            if data['bedrooms'] or data['bathrooms'] or data['sq_ft'] or data['market_value']:
-                results['cross_validation'] = perform_cross_validation(data)
-            # Location analysis if location data is available
-            if data['latitude'] and data['longitude']:
-                geocoder = get_model('geocoder')
-                if geocoder:
-                    results['location_analysis'] = analyze_location(data)
-                else:
-                    results['missing_data'].append("Location analysis unavailable")
-            # Price analysis if price data is available
-            if data['market_value']:
-                classifier = get_model('classifier')
-                if classifier:
-                    results['price_analysis'] = analyze_price(data)
-                else:
-                    results['missing_data'].append("Price analysis unavailable")
-            # Legal analysis if legal details are available
-            if data['legal_details']:
-                classifier = get_model('classifier')
-                if classifier:
-                    results['legal_analysis'] = analyze_legal_details(data['legal_details'])
-                else:
-                    results['missing_data'].append("Legal analysis unavailable")
-            # Property specs verification if specs are available
-            if data['bedrooms'] or data['bathrooms'] or data['sq_ft'] or data['market_value']:
-                results['specs_verification'] = verify_property_specs(data)
-            # Market analysis if price and property details are available
-            if data['market_value'] and (data['sq_ft'] or data['property_type']):
-                classifier = get_model('classifier')
-                if classifier:
-                    results['market_analysis'] = analyze_market_value(data)
-                else:
-                    results['missing_data'].append("Market analysis unavailable")
         except Exception as e:
-            logger.error(f"Error during analysis: {str(e)}")
-            results['error'] = f"Error during analysis: {str(e)}"
-        # Clean up after processing
-        cleanup_models()
         return jsonify(make_json_serializable(results))
     except Exception as e:
         logger.error(f"Error in verify_property: {str(e)}")
-        cleanup_models()
         return jsonify({
             'error': 'Server error occurred. Please try again later.',
             'status': 'error',
@@ -618,9 +340,9 @@ def extract_pdf_text(pdf_file):
 def analyze_image(image):
     try:
-        if models['clip_processor'] and models['clip_model']:
             img_rgb = image.convert('RGB')
-            inputs = models['clip_processor'](
                 text=[
                     "real estate property interior",
                     "real estate property exterior",
@@ -632,7 +354,7 @@ def analyze_image(image):
                 return_tensors="pt",
                 padding=True
             )
-            outputs = models['clip_model'](**inputs)
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1).detach().numpy()[0]
@@ -799,7 +521,7 @@ def analyze_pdf_content(document_text, property_data):
 def check_document_consistency(document_text, property_data):
     try:
-        if not models['sentence_model']:
             logger.warning("Sentence model unavailable")
             return 0.5
         property_text = ' '.join([
@@ -808,8 +530,8 @@ def check_document_consistency(document_text, property_data):
                 'state', 'market_value', 'sq_ft', 'bedrooms'
             ]
         ])
-        property_embedding = models['sentence_model'].encode(property_text)
-        document_embedding = models['sentence_model'].encode(document_text[:1000])
         similarity = util.cos_sim(property_embedding, document_embedding)[0][0].item()
         return max(0.0, min(1.0, float(similarity)))
     except Exception as e:
@@ -838,48 +560,75 @@ def extract_document_key_info(text):
         return {}
 def generate_property_summary(data):
-    """Generate a summary of the property listing"""
     try:
-        # Get the summarizer model
-        summarizer = get_model('summarizer')
-        if summarizer is None:
-            logger.error("Summarizer model not available")
-            return "Unable to generate summary due to model unavailability"
         # Create a detailed context for summary generation
-        context = f"""
-        Property Name: {data.get('property_name', 'Not specified')}
-        Property Type: {data.get('property_type', 'Not specified')}
-        Location: {data.get('address', 'Not specified')}, {data.get('city', 'Not specified')}, {data.get('state', 'Not specified')}
-        Price: ₹{data.get('market_value', 'Not specified')}
-        Area: {data.get('sq_ft', 'Not specified')} sq.ft.
-        Bedrooms: {data.get('bedrooms', 'Not specified')}
-        Bathrooms: {data.get('bathrooms', 'Not specified')}
-        Year Built: {data.get('year_built', 'Not specified')}
-        Parking: {data.get('parking', 'Not specified')} spaces
-        Amenities: {data.get('amenities', 'Not specified')}
-        Nearby Landmarks: {data.get('nearby_landmarks', 'Not specified')}
         """
         # Generate initial summary
-        summary = summarizer(context, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
-        # Enhance the summary with key features
-        enhanced_summary = f"Property Summary: {summary}"
-        # Add key features if available
         key_features = []
         if data.get('amenities'):
             key_features.append(f"Amenities: {data['amenities']}")
-        if data.get('nearby_landmarks'):
-            key_features.append(f"Nearby: {data['nearby_landmarks']}")
         if key_features:
-            enhanced_summary += f"\nKey Features: {', '.join(key_features)}"
         return enhanced_summary
     except Exception as e:
         logger.error(f"Error generating property summary: {str(e)}")
-        return "Error generating property summary"
 def summarize_text(text):
     try:
@@ -895,8 +644,9 @@ def summarize_text(text):
         logger.error(f"Error summarizing text: {str(e)}")
         return text[:200] + "..." if len(text) > 200 else text
-def classify_fraud(text, data, classifier):
     try:
         categories = [
             "suspicious pricing pattern",
             "potentially fraudulent listing",
@@ -916,8 +666,8 @@ def classify_fraud(text, data, classifier):
         - Name: {data.get('property_name', 'Not provided')}
         - Type: {data.get('property_type', 'Not provided')}
         - Status: {data.get('property_status', 'Not provided')}
-        - Price: ₹{data.get('market_value', 'Not provided')}
-        - Square Footage: {data.get('sq_ft', 'Not provided')}
         - Year Built: {data.get('year_built', 'Not provided')}
         - Location: {data.get('address', 'Not provided')}
         - Description: {text}
@@ -939,7 +689,7 @@ def classify_fraud(text, data, classifier):
                     high_risk.append((label, score))
                 elif score > 0.5:
                     medium_risk.append((label, score))
-                else:
                     low_risk.append((label, score))
         # Calculate alert score with adjusted weights
@@ -961,12 +711,97 @@ def classify_fraud(text, data, classifier):
         else:
             alert_level = 'minimal'
         return {
             'alert_level': alert_level,
             'alert_score': alert_score,
             'high_risk': high_risk,
             'medium_risk': medium_risk,
-            'low_risk': low_risk
         }
     except Exception as e:
         logger.error(f"Error in fraud classification: {str(e)}")
@@ -975,7 +810,8 @@ def classify_fraud(text, data, classifier):
             'alert_score': 1.0,
             'high_risk': [],
             'medium_risk': [],
-            'low_risk': []
         }
 def generate_trust_score(text, image_analysis, pdf_analysis):
@@ -1094,114 +930,244 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
         logger.error(f"Error generating trust score: {str(e)}")
         return 20, "Could not assess trust."
-def generate_suggestions(consolidated_text, data):
-    """Generate property improvement suggestions based on analysis."""
     try:
-        # Get the classifier model
-        classifier = get_model('classifier')
-        if classifier is None:
-            return []
-        # Prepare context for suggestions
-        suggestion_context = f"""
-        Property Analysis Context:
-        {consolidated_text}
-        Property Type: {data.get('property_type', 'unknown')}
-        Location: {data.get('city', 'unknown')}, {data.get('state', 'unknown')}
-        Price: ₹{data.get('market_value', 'unknown')}
-        Area: {data.get('sq_ft', 'unknown')} sq.ft.
-        Year Built: {data.get('year_built', 'unknown')}
-        """
-        # Define base suggestions with weights
         base_suggestions = {
-            'Presentation': {
-                'categories': ['excellent', 'good', 'fair', 'poor'],
-                'weight': 1.0,
                 'improvements': {
-                    'fair': [
-                        'Consider professional staging',
-                        'Improve lighting in key areas',
-                        'Declutter living spaces'
                     ],
-                    'poor': [
-                        'Hire professional photographer',
-                        'Deep clean the property',
-                        'Repaint walls in neutral colors'
                     ]
                 }
             },
-            'Maintenance': {
-                'categories': ['well-maintained', 'needs-minor-work', 'needs-major-work'],
-                'weight': 0.9,
                 'improvements': {
-                    'needs-minor-work': [
-                        'Fix minor plumbing issues',
-                        'Repair cracked tiles',
-                        'Replace worn-out fixtures'
                     ],
-                    'needs-major-work': [
-                        'Address structural issues',
-                        'Update electrical system',
-                        'Replace HVAC system'
                     ]
                 }
             },
-            'Market Appeal': {
-                'categories': ['high', 'medium', 'low'],
-                'weight': 0.8,
                 'improvements': {
-                    'medium': [
-                        'Enhance curb appeal',
-                        'Update kitchen appliances',
-                        'Add modern amenities'
                     ],
-                    'low': [
-                        'Consider price adjustment',
-                        'Improve property description',
-                        'Highlight unique features'
                     ]
                 }
             }
         }
         suggestions = []
         confidence_scores = []
-        # Analyze each aspect
         for aspect, config in base_suggestions.items():
             try:
-                # Analyze each aspect with context
-                result = classifier(suggestion_context[:1000], config['categories'])
-                # Get the most relevant category
-                top_category = result['labels'][0]
-                confidence = float(result['scores'][0])
-                # If the category indicates improvement needed (confidence < 0.6)
-                if confidence < 0.6 and top_category in config.get('improvements', {}):
-                    weighted_confidence = confidence * config['weight']
-                    for improvement in config['improvements'][top_category]:
                         suggestions.append({
-                            'aspect': aspect,
-                            'category': top_category,
-                            'suggestion': improvement,
-                            'confidence': weighted_confidence
                         })
-                    confidence_scores.append(weighted_confidence)
-            except Exception as e:
-                logger.error(f"Error analyzing aspect {aspect}: {str(e)}")
-                continue
-        # Sort suggestions by confidence
-        suggestions.sort(key=lambda x: x['confidence'], reverse=True)
-        # Return top 10 suggestions
-        return suggestions[:10]
     except Exception as e:
         logger.error(f"Error generating suggestions: {str(e)}")
-        return []
 def assess_text_quality(text):
     try:
@@ -1331,8 +1297,7 @@ def verify_address(data):
             'verification_score': 0.0
         }
-        # Verify pincode
-        if data.get('zip'):
             try:
                 response = requests.get(f"https://api.postalpincode.in/pincode/{data['zip']}", timeout=5)
                 if response.status_code == 200:
@@ -1354,28 +1319,14 @@ def verify_address(data):
                 logger.error(f"Pincode API error: {str(e)}")
                 address_results['issues'].append("Pincode validation failed")
-        # Verify address with geocoding
-        full_address = ', '.join(filter(None, [
-            data.get('address', ''),
-            data.get('city', ''),
-            data.get('state', ''),
-            data.get('country', ''),
-            data.get('zip', '')
-        ]))
-        if full_address:
             try:
-                # Initialize geocoder if not already done
-                if not models['geocoder']:
-                    models['geocoder'] = Nominatim(user_agent="property_verifier", timeout=10)
-                location = models['geocoder'].geocode(full_address)
                 if location:
                     address_results['address_exists'] = True
                     address_results['confidence'] = 0.9
-                    # Verify coordinates if provided
-                    if data.get('latitude') and data.get('longitude'):
                         try:
                             provided_coords = (float(data['latitude']), float(data['longitude']))
                             geocoded_coords = (location.latitude, location.longitude)
@@ -1384,16 +1335,16 @@ def verify_address(data):
                             address_results['coordinates_match'] = dist < 1.0
                             if not address_results['coordinates_match']:
                                 address_results['issues'].append(f"Coordinates {dist:.2f}km off")
-                        except Exception as e:
-                            logger.error(f"Coordinate verification error: {str(e)}")
                             address_results['issues'].append("Invalid coordinates")
-                else:
-                    address_results['issues'].append("Address not found in geocoding service")
             except Exception as e:
-                logger.error(f"Geocoding error: {str(e)}")
-                address_results['issues'].append("Address geocoding failed")
-        # Calculate verification score
         verification_points = (
             address_results['address_exists'] * 0.4 +
             address_results['pincode_valid'] * 0.3 +
@@ -1404,16 +1355,9 @@ def verify_address(data):
         return address_results
     except Exception as e:
-        logger.error(f"Error in verify_address: {str(e)}")
-        return {
-            'address_exists': False,
-            'pincode_valid': False,
-            'city_state_match': False,
-            'coordinates_match': False,
-            'confidence': 0.0,
-            'issues': [f"Error during verification: {str(e)}"],
-            'verification_score': 0.0
-        }
 def perform_cross_validation(data):
     try:
@@ -1793,7 +1737,7 @@ def analyze_location(data):
         if data['city'] and data['state']:
             for attempt in range(3):
                 try:
-                    location = models['geocoder'].geocode(f"{data['city']}, {data['state']}, India")
                     if location:
                         location_quality = "verified"
                         break
@@ -2547,10 +2491,64 @@ def check_if_property_related(text):
             'confidence': 0.0
         }
 if __name__ == '__main__':
-    # Initialize models before starting the server
-    initialize_models()
-    app.run(host='0.0.0.0', port=7860, debug=False)
-else:
-    # Initialize models when running with gunicorn
-    initialize_models()

 from flask import Flask, render_template, request, jsonify
 from flask_cors import CORS
 import torch
+from transformers import pipeline, CLIPProcessor, CLIPModel, BitsAndBytesConfig
 import base64
 import io
 import re
 from functools import lru_cache
 import time
 import math
+from pyngrok import ngrok
 import threading
 import gc
+import psutil
 app = Flask(__name__)
+CORS(app)  # Enable CORS for frontend
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
     handlers=[
+        logging.FileHandler('app.log'),
         logging.StreamHandler()
     ]
 )
 logger = logging.getLogger(__name__)
+# Initialize geocoder
+geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
+# Add memory monitoring function
+def monitor_memory():
+    while True:
+        process = psutil.Process()
+        memory_info = process.memory_info()
+        logger.info(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")
+        if memory_info.rss > 2 * 1024 * 1024 * 1024:  # If using more than 2GB
+            logger.warning("High memory usage detected, clearing cache")
+            clear_model_cache()
+        time.sleep(300)  # Check every 5 minutes
+# Start memory monitoring in a separate thread
+memory_monitor_thread = threading.Thread(target=monitor_memory, daemon=True)
+memory_monitor_thread.start()
+# Initialize CLIP model
+try:
+    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+    clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+    has_clip_model = True
+    logger.info("CLIP model loaded successfully")
+except Exception as e:
+    logger.error(f"Error loading CLIP model: {str(e)}")
+    has_clip_model = False
+# Initialize sentence transformer
+try:
+    sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
+    logger.info("Sentence transformer loaded successfully")
+except Exception as e:
+    logger.error(f"Error loading sentence transformer: {str(e)}")
+    sentence_model = None
+# Initialize spaCy
+try:
+    nlp = spacy.load('en_core_web_md')
+    logger.info("spaCy model loaded successfully")
+except Exception as e:
+    logger.error(f"Error loading spaCy model: {str(e)}")
+    nlp = None
 def make_json_serializable(obj):
     try:
                 'message': 'Latitude and longitude are required'
             }), 400
         # Retry geocoding up to 3 times
         for attempt in range(3):
             try:
+                location = geocoder.reverse((latitude, longitude), exactly_one=True)
                 if location:
                     address_components = location.raw.get('address', {})
                     return jsonify({
 @app.route('/verify', methods=['POST'])
 def verify_property():
     try:
+        if not request.form and not request.files:
+            logger.warning("No form data or files provided")
             return jsonify({
                 'error': 'No data provided',
                 'status': 'error'
             }), 400
+        data = {
+            'property_name': request.form.get('property_name', '').strip(),
+            'property_type': request.form.get('property_type', '').strip(),
+            'status': request.form.get('status', '').strip(),
+            'description': request.form.get('description', '').strip(),
+            'address': request.form.get('address', '').strip(),
+            'city': request.form.get('city', '').strip(),
+            'state': request.form.get('state', '').strip(),
+            'country': request.form.get('country', 'India').strip(),
+            'zip': request.form.get('zip', '').strip(),
+            'latitude': request.form.get('latitude', '').strip(),
+            'longitude': request.form.get('longitude', '').strip(),
+            'bedrooms': request.form.get('bedrooms', '').strip(),
+            'bathrooms': request.form.get('bathrooms', '').strip(),
+            'total_rooms': request.form.get('total_rooms', '').strip(),
+            'year_built': request.form.get('year_built', '').strip(),
+            'parking': request.form.get('parking', '').strip(),
+            'sq_ft': request.form.get('sq_ft', '').strip(),
+            'market_value': request.form.get('market_value', '').strip(),
+            'amenities': request.form.get('amenities', '').strip(),
+            'nearby_landmarks': request.form.get('nearby_landmarks', '').strip(),
+            'legal_details': request.form.get('legal_details', '').strip()
         }
+        required_fields = ['property_name', 'property_type', 'address', 'city', 'state']
+        missing_fields = [field for field in required_fields if not data[field]]
+        if missing_fields:
+            logger.warning(f"Missing required fields: {', '.join(missing_fields)}")
+            return jsonify({
+                'error': f"Missing required fields: {', '.join(missing_fields)}",
+                'status': 'error'
+            }), 400
+        images = []
+        image_analysis = []
         if 'images' in request.files:
             image_files = request.files.getlist('images')
+            for img_file in image_files:
+                if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
+                    try:
+                        img = Image.open(img_file)
+                        buffered = io.BytesIO()
+                        img.save(buffered, format="JPEG")
+                        img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
+                        images.append(img_str)
+                        image_analysis.append(analyze_image(img))
+                    except Exception as e:
+                        logger.error(f"Error processing image {img_file.filename}: {str(e)}")
+                        image_analysis.append({'error': str(e), 'is_property_related': False})
+        pdf_texts = []
+        pdf_analysis = []
         if 'documents' in request.files:
             pdf_files = request.files.getlist('documents')
+            for pdf_file in pdf_files:
+                if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
+                    try:
+                        pdf_text = extract_pdf_text(pdf_file)
+                        pdf_texts.append({
+                            'filename': pdf_file.filename,
+                            'text': pdf_text
+                        })
+                        pdf_analysis.append(analyze_pdf_content(pdf_text, data))
+                    except Exception as e:
+                        logger.error(f"Error processing PDF {pdf_file.filename}: {str(e)}")
+                        pdf_analysis.append({'error': str(e)})
         consolidated_text = f"""
         Property Name: {data['property_name']}
         Property Type: {data['property_type']}
         Legal Details: {data['legal_details']}
         """
         try:
+            description = data['description']
+            if description and len(description) > 10:
+                text_language = detect(description)
+                if text_language != 'en':
+                    translated_description = GoogleTranslator(source=text_language, target='en').translate(description)
+                    data['description_translated'] = translated_description
                 else:
+                    data['description_translated'] = description
             else:
+                data['description_translated'] = description
         except Exception as e:
+            logger.error(f"Error in language detection/translation: {str(e)}")
+            data['description_translated'] = data['description']
+        summary = generate_property_summary(data)
+        fraud_classification = classify_fraud(consolidated_text, data)
+        trust_score, trust_reasoning = generate_trust_score(consolidated_text, image_analysis, pdf_analysis)
+        suggestions = generate_suggestions(consolidated_text, data)
+        quality_assessment = assess_text_quality(data['description_translated'])
+        address_verification = verify_address(data)
+        cross_validation = perform_cross_validation(data)
+        location_analysis = analyze_location(data)
+        price_analysis = analyze_price(data)
+        legal_analysis = analyze_legal_details(data['legal_details'])
+        specs_verification = verify_property_specs(data)
+        market_analysis = analyze_market_value(data)
+        document_analysis = {
+            'pdf_count': len(pdf_texts),
+            'pdf_texts': pdf_texts,
+            'pdf_analysis': pdf_analysis
+        }
+        image_results = {
+            'image_count': len(images),
+            'image_analysis': image_analysis
+        }
+        report_id = str(uuid.uuid4())
+        results = {
+            'report_id': report_id,
+            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            'summary': summary,
+            'fraud_classification': fraud_classification,
+            'trust_score': {
+                'score': trust_score,
+                'reasoning': trust_reasoning
+            },
+            'suggestions': suggestions,
+            'quality_assessment': quality_assessment,
+            'address_verification': address_verification,
+            'cross_validation': cross_validation,
+            'location_analysis': location_analysis,
+            'price_analysis': price_analysis,
+            'legal_analysis': legal_analysis,
+            'document_analysis': document_analysis,
+            'image_analysis': image_results,
+            'specs_verification': specs_verification,
+            'market_analysis': market_analysis,
+            'images': images
+        }
         return jsonify(make_json_serializable(results))
     except Exception as e:
         logger.error(f"Error in verify_property: {str(e)}")
         return jsonify({
             'error': 'Server error occurred. Please try again later.',
             'status': 'error',
 def analyze_image(image):
     try:
+        if has_clip_model:
             img_rgb = image.convert('RGB')
+            inputs = clip_processor(
                 text=[
                     "real estate property interior",
                     "real estate property exterior",
                 return_tensors="pt",
                 padding=True
             )
+            outputs = clip_model(**inputs)
             logits_per_image = outputs.logits_per_image
             probs = logits_per_image.softmax(dim=1).detach().numpy()[0]
 def check_document_consistency(document_text, property_data):
     try:
+        if not sentence_model:
             logger.warning("Sentence model unavailable")
             return 0.5
         property_text = ' '.join([
                 'state', 'market_value', 'sq_ft', 'bedrooms'
             ]
         ])
+        property_embedding = sentence_model.encode(property_text)
+        document_embedding = sentence_model.encode(document_text[:1000])
         similarity = util.cos_sim(property_embedding, document_embedding)[0][0].item()
         return max(0.0, min(1.0, float(similarity)))
     except Exception as e:
         return {}
 def generate_property_summary(data):
     try:
         # Create a detailed context for summary generation
+        property_context = f"""
+        Property Name: {data.get('property_name', '')}
+        Type: {data.get('property_type', '')}
+        Status: {data.get('status', '')}
+        Location: {data.get('address', '')}, {data.get('city', '')}, {data.get('state', '')}, {data.get('country', '')}
+        Size: {data.get('sq_ft', '')} sq. ft.
+        Price: ₹{data.get('market_value', '0')}
+        Bedrooms: {data.get('bedrooms', '')}
+        Bathrooms: {data.get('bathrooms', '')}
+        Year Built: {data.get('year_built', '')}
+        Description: {data.get('description', '')}
         """
+        # Use BART for summary generation
+        summarizer = load_model("summarization", "facebook/bart-large-cnn")
         # Generate initial summary
+        summary_result = summarizer(property_context, max_length=150, min_length=50, do_sample=False)
+        initial_summary = summary_result[0]['summary_text']
+        # Enhance summary with key features
         key_features = []
+        # Add property type and status
+        if data.get('property_type') and data.get('status'):
+            key_features.append(f"{data['property_type']} is {data['status'].lower()}")
+        # Add location if available
+        location_parts = []
+        if data.get('city'):
+            location_parts.append(data['city'])
+        if data.get('state'):
+            location_parts.append(data['state'])
+        if location_parts:
+            key_features.append(f"Located in {', '.join(location_parts)}")
+        # Add size and price if available
+        if data.get('sq_ft'):
+            key_features.append(f"Spans {data['sq_ft']} sq. ft.")
+        if data.get('market_value'):
+            key_features.append(f"Valued at ₹{data['market_value']}")
+        # Add rooms information
+        rooms_info = []
+        if data.get('bedrooms'):
+            rooms_info.append(f"{data['bedrooms']} bedroom{'s' if data['bedrooms'] != '1' else ''}")
+        if data.get('bathrooms'):
+            rooms_info.append(f"{data['bathrooms']} bathroom{'s' if data['bathrooms'] != '1' else ''}")
+        if rooms_info:
+            key_features.append(f"Features {' and '.join(rooms_info)}")
+        # Add amenities if available
         if data.get('amenities'):
             key_features.append(f"Amenities: {data['amenities']}")
+        # Combine initial summary with key features
+        enhanced_summary = initial_summary
         if key_features:
+            enhanced_summary += " " + ". ".join(key_features) + "."
+        # Clean up the summary
+        enhanced_summary = enhanced_summary.replace("  ", " ").strip()
         return enhanced_summary
     except Exception as e:
         logger.error(f"Error generating property summary: {str(e)}")
+        return "Could not generate summary."
 def summarize_text(text):
     try:
         logger.error(f"Error summarizing text: {str(e)}")
         return text[:200] + "..." if len(text) > 200 else text
+def classify_fraud(text, data=None):
     try:
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
         categories = [
             "suspicious pricing pattern",
             "potentially fraudulent listing",
         - Name: {data.get('property_name', 'Not provided')}
         - Type: {data.get('property_type', 'Not provided')}
         - Status: {data.get('property_status', 'Not provided')}
+        - Price: {data.get('market_value', 'Not provided')}
+        - Square Footage: {data.get('square_footage', 'Not provided')}
         - Year Built: {data.get('year_built', 'Not provided')}
         - Location: {data.get('address', 'Not provided')}
         - Description: {text}
                     high_risk.append((label, score))
                 elif score > 0.5:
                     medium_risk.append((label, score))
+            else:
                     low_risk.append((label, score))
         # Calculate alert score with adjusted weights
         else:
             alert_level = 'minimal'
+        # Enhanced fraud indicators with more specific patterns
+        fraud_indicators = []
+        # Price-related patterns
+        price_patterns = [
+            (r'suspiciously low price', 0.8),
+            (r'unusually high price', 0.7),
+            (r'price too good to be true', 0.9),
+            (r'urgent sale', 0.6),
+            (r'must sell quickly', 0.7)
+        ]
+        # Location-related patterns
+        location_patterns = [
+            (r'location mismatch', 0.8),
+            (r'address inconsistency', 0.7),
+            (r'wrong neighborhood', 0.6),
+            (r'incorrect zip code', 0.7)
+        ]
+        # Document-related patterns
+        document_patterns = [
+            (r'missing documents', 0.8),
+            (r'unverified documents', 0.7),
+            (r'fake documents', 0.9),
+            (r'photoshopped documents', 0.8)
+        ]
+        # Urgency-related patterns
+        urgency_patterns = [
+            (r'act now', 0.6),
+            (r'limited time offer', 0.5),
+            (r'first come first served', 0.4),
+            (r'won\'t last long', 0.5)
+        ]
+        # Check all patterns
+        all_patterns = price_patterns + location_patterns + document_patterns + urgency_patterns
+        for pattern, weight in all_patterns:
+            if re.search(pattern, text.lower()):
+                fraud_indicators.append({
+                    'pattern': pattern,
+                    'weight': weight,
+                    'context': text[max(0, text.lower().find(pattern)-50):min(len(text), text.lower().find(pattern)+50)]
+                })
+        # Additional checks for data inconsistencies
+        if data:
+            # Check for suspiciously low price per square foot
+            try:
+                price = float(data.get('market_value', 0))
+                sqft = float(data.get('square_footage', 1))
+                price_per_sqft = price / sqft
+                if price_per_sqft < 50:  # Unusually low price per square foot
+                    fraud_indicators.append({
+                        'pattern': 'suspiciously low price per square foot',
+                        'weight': 0.8,
+                        'context': f'Price per square foot: ${price_per_sqft:.2f}'
+                    })
+            except (ValueError, ZeroDivisionError):
+                pass
+            # Check for impossible values
+            try:
+                year_built = int(data.get('year_built', 0))
+                if year_built < 1800 or year_built > 2024:
+                    fraud_indicators.append({
+                        'pattern': 'impossible year built',
+                        'weight': 0.9,
+                        'context': f'Year built: {year_built}'
+                    })
+            except ValueError:
+                pass
+            # Check for missing critical information
+            critical_fields = ['property_name', 'property_type', 'address', 'market_value', 'square_footage']
+            missing_fields = [field for field in critical_fields if not data.get(field)]
+            if missing_fields:
+                fraud_indicators.append({
+                    'pattern': 'missing critical information',
+                    'weight': 0.7,
+                    'context': f'Missing fields: {", ".join(missing_fields)}'
+                })
         return {
             'alert_level': alert_level,
             'alert_score': alert_score,
             'high_risk': high_risk,
             'medium_risk': medium_risk,
+            'low_risk': low_risk,
+            'fraud_indicators': fraud_indicators
         }
     except Exception as e:
         logger.error(f"Error in fraud classification: {str(e)}")
             'alert_score': 1.0,
             'high_risk': [],
             'medium_risk': [],
+            'low_risk': [],
+            'fraud_indicators': []
         }
 def generate_trust_score(text, image_analysis, pdf_analysis):
         logger.error(f"Error generating trust score: {str(e)}")
         return 20, "Could not assess trust."
+def generate_suggestions(text, data=None):
     try:
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
+        # Create comprehensive context for analysis
+        suggestion_context = text
+        if data:
+            suggestion_context += f"""
+            Additional Context:
+            Property Type: {data.get('property_type', '')}
+            Location: {data.get('city', '')}, {data.get('state', '')}
+            Size: {data.get('sq_ft', '')} sq.ft.
+            Year Built: {data.get('year_built', '')}
+            """
+        # Enhanced suggestion categories based on property context
         base_suggestions = {
+            'documentation': {
+                'label': "add more documentation",
+                'categories': [
+                    "complete documentation provided",
+                    "missing essential documents",
+                    "incomplete paperwork",
+                    "documentation needs verification"
+                ],
+                'weight': 2.0,
+                'improvements': {
+                    'missing essential documents': [
+                        "Add property deed or title documents",
+                        "Include recent property tax records",
+                        "Attach property registration documents"
+                    ],
+                    'incomplete paperwork': [
+                        "Complete all required legal documents",
+                        "Add missing ownership proof",
+                        "Include property survey documents"
+                    ]
+                }
+            },
+            'details': {
+                'label': "enhance property details",
+                'categories': [
+                    "detailed property information",
+                    "basic information only",
+                    "missing key details",
+                    "comprehensive description"
+                ],
+                'weight': 1.8,
+                'improvements': {
+                    'basic information only': [
+                        "Add more details about property features",
+                        "Include information about recent renovations",
+                        "Describe unique selling points"
+                    ],
+                    'missing key details': [
+                        "Specify exact built-up area",
+                        "Add floor plan details",
+                        "Include maintenance costs"
+                    ]
+                }
+            },
+            'images': {
+                'label': "improve visual content",
+                'categories': [
+                    "high quality images provided",
+                    "poor image quality",
+                    "insufficient images",
+                    "missing key area photos"
+                ],
+                'weight': 1.5,
                 'improvements': {
+                    'poor image quality': [
+                        "Add high-resolution property photos",
+                        "Include better lighting in images",
+                        "Provide professional photography"
                     ],
+                    'insufficient images': [
+                        "Add more interior photos",
+                        "Include exterior and surrounding area images",
+                        "Add photos of amenities"
                     ]
                 }
             },
+            'pricing': {
+                'label': "pricing information",
+                'categories': [
+                    "detailed pricing breakdown",
+                    "basic price only",
+                    "missing price details",
+                    "unclear pricing terms"
+                ],
+                'weight': 1.7,
                 'improvements': {
+                    'basic price only': [
+                        "Add detailed price breakdown",
+                        "Include maintenance charges",
+                        "Specify additional costs"
                     ],
+                    'missing price details': [
+                        "Add price per square foot",
+                        "Include tax implications",
+                        "Specify payment terms"
                     ]
                 }
             },
+            'location': {
+                'label': "location details",
+                'categories': [
+                    "comprehensive location info",
+                    "basic location only",
+                    "missing location details",
+                    "unclear accessibility info"
+                ],
+                'weight': 1.6,
                 'improvements': {
+                    'basic location only': [
+                        "Add nearby landmarks and distances",
+                        "Include transportation options",
+                        "Specify neighborhood facilities"
                     ],
+                    'missing location details': [
+                        "Add exact GPS coordinates",
+                        "Include area development plans",
+                        "Specify distance to key facilities"
                     ]
                 }
             }
         }
         suggestions = []
         confidence_scores = []
         for aspect, config in base_suggestions.items():
+            # Analyze each aspect with context
+            result = classifier(suggestion_context[:1000], config['categories'])
+            # Get the most relevant category
+            top_category = result['labels'][0]
+            confidence = float(result['scores'][0])
+            # If the category indicates improvement needed (confidence < 0.6)
+            if confidence < 0.6 and top_category in config['improvements']:
+                weighted_confidence = confidence * config['weight']
+                for improvement in config['improvements'][top_category]:
+                    suggestions.append({
+                        'aspect': aspect,
+                        'category': top_category,
+                        'suggestion': improvement,
+                        'confidence': weighted_confidence
+                    })
+                confidence_scores.append(weighted_confidence)
+        # Sort suggestions by confidence and priority
+        suggestions.sort(key=lambda x: x['confidence'], reverse=True)
+        # Property type specific suggestions
+        if data and data.get('property_type'):
+            property_type = data['property_type'].lower()
+            type_specific_suggestions = {
+                'residential': [
+                    "Add information about school districts",
+                    "Include details about neighborhood safety",
+                    "Specify parking arrangements"
+                ],
+                'commercial': [
+                    "Add foot traffic statistics",
+                    "Include zoning information",
+                    "Specify business licenses required"
+                ],
+                'industrial': [
+                    "Add power supply specifications",
+                    "Include environmental clearances",
+                    "Specify loading/unloading facilities"
+                ],
+                'land': [
+                    "Add soil testing reports",
+                    "Include development potential analysis",
+                    "Specify available utilities"
+                ]
+            }
+            for type_key, type_suggestions in type_specific_suggestions.items():
+                if type_key in property_type:
+                    for suggestion in type_suggestions:
+                        suggestions.append({
+                            'aspect': 'property_type_specific',
+                            'category': 'type_specific_requirements',
+                            'suggestion': suggestion,
+                            'confidence': 0.8  # High confidence for type-specific suggestions
+                        })
+        # Add market-based suggestions
+        if data and data.get('market_value'):
             try:
+                market_value = float(data['market_value'].replace('₹', '').replace(',', ''))
+                if market_value > 10000000:  # High-value property
+                    premium_suggestions = [
+                        "Add virtual tour of the property",
+                        "Include detailed investment analysis",
+                        "Provide historical price trends"
+                    ]
+                    for suggestion in premium_suggestions:
                         suggestions.append({
+                            'aspect': 'premium_property',
+                            'category': 'high_value_requirements',
+                            'suggestion': suggestion,
+                            'confidence': 0.9
                         })
+            except ValueError:
+                pass
+        # Calculate overall completeness score
+        completeness_score = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0
+        completeness_score = min(100, max(0, completeness_score * 100))
+        return {
+            'suggestions': suggestions[:10],  # Return top 10 suggestions
+            'completeness_score': completeness_score,
+            'priority_aspects': [s['aspect'] for s in suggestions[:3]],
+            'improvement_summary': f"Focus on improving {', '.join([s['aspect'] for s in suggestions[:3]])}",
+            'total_suggestions': len(suggestions)
+        }
     except Exception as e:
         logger.error(f"Error generating suggestions: {str(e)}")
+        return {
+            'suggestions': [
+                {
+                    'aspect': 'general',
+                    'category': 'basic_requirements',
+                    'suggestion': 'Please provide more property details',
+                    'confidence': 0.5
+                }
+            ],
+            'completeness_score': 0,
+            'priority_aspects': ['general'],
+            'improvement_summary': "Add basic property information",
+            'total_suggestions': 1
+        }
 def assess_text_quality(text):
     try:
             'verification_score': 0.0
         }
+        if data['zip']:
             try:
                 response = requests.get(f"https://api.postalpincode.in/pincode/{data['zip']}", timeout=5)
                 if response.status_code == 200:
                 logger.error(f"Pincode API error: {str(e)}")
                 address_results['issues'].append("Pincode validation failed")
+        full_address = ', '.join(filter(None, [data['address'], data['city'], data['state'], data['country'], data['zip']]))
+        for attempt in range(3):
             try:
+                location = geocoder.geocode(full_address)
                 if location:
                     address_results['address_exists'] = True
                     address_results['confidence'] = 0.9
+                    if data['latitude'] and data['longitude']:
                         try:
                             provided_coords = (float(data['latitude']), float(data['longitude']))
                             geocoded_coords = (location.latitude, location.longitude)
                             address_results['coordinates_match'] = dist < 1.0
                             if not address_results['coordinates_match']:
                                 address_results['issues'].append(f"Coordinates {dist:.2f}km off")
+                        except:
                             address_results['issues'].append("Invalid coordinates")
+                    break
+                time.sleep(1)
             except Exception as e:
+                logger.error(f"Geocoding error on attempt {attempt + 1}: {str(e)}")
+                time.sleep(1)
+        else:
+            address_results['issues'].append("Address geocoding failed")
         verification_points = (
             address_results['address_exists'] * 0.4 +
             address_results['pincode_valid'] * 0.3 +
         return address_results
     except Exception as e:
+        logger.error(f"Error verifying address: {str(e)}")
+        address_results['issues'].append(str(e))
+        return address_results
 def perform_cross_validation(data):
     try:
         if data['city'] and data['state']:
             for attempt in range(3):
                 try:
+                    location = geocoder.geocode(f"{data['city']}, {data['state']}, India")
                     if location:
                         location_quality = "verified"
                         break
             'confidence': 0.0
         }
+# Update the load_model function to use memory optimizations
+@lru_cache(maxsize=3)  # Limit cache size
+def load_model(task, model_name):
+    try:
+        logger.info(f"Loading model: {model_name} for task: {task}")
+        # Use smaller, more efficient models
+        if task == "zero-shot-classification":
+            # Use smaller model for zero-shot classification
+            model_name = "facebook/bart-large-mnli"  # ~1.6GB
+            return pipeline(task, model=model_name, device=-1)
+        elif task == "summarization":
+            # Use smaller model for summarization
+            model_name = "facebook/bart-large-cnn"  # ~1.6GB
+            return pipeline(task, model=model_name, device=-1)
+        elif task == "text-classification":
+            # Use very small model for text classification
+            model_name = "distilbert-base-uncased"  # ~260MB
+            return pipeline(task, model=model_name, device=-1)
+        elif task == "feature-extraction":
+            # Use small model for feature extraction
+            model_name = "sentence-transformers/all-MiniLM-L6-v2"  # ~80MB
+            return pipeline(task, model=model_name, device=-1)
+        else:
+            # Default to small model for unknown tasks
+            model_name = "distilbert-base-uncased"
+            return pipeline(task, model=model_name, device=-1)
+    except Exception as e:
+        logger.error(f"Error loading model {model_name}: {str(e)}")
+        raise
+# Add memory cleanup function
+def clear_model_cache():
+    """Clear model cache and free up memory"""
+    load_model.cache_clear()
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    logger.info("Model cache cleared and memory freed")
 if __name__ == '__main__':
+    # Set up ngrok
+    http_tunnel = ngrok.connect(5000)
+    print(f' * Public URL: {http_tunnel.public_url}')
+    # Run Flask app in a separate thread
+    def run_flask():
+        app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=False)
+    flask_thread = threading.Thread(target=run_flask)
+    flask_thread.daemon = True
+    flask_thread.start()
+    try:
+        # Keep the main thread running
+        while True:
+            time.sleep(1)
+    except KeyboardInterrupt:
+        print(" * Shutting down server...")
+        ngrok.disconnect(http_tunnel.public_url)
+        ngrok.kill()