Spaces:

sksameermujahid
/

property_verification

Sleeping

App Files Files Community

sksameermujahid commited on Apr 24

Commit

b6fdb69

verified ·

1 Parent(s): 1d2d77d

Update app.py

Browse files

Files changed (1) hide show

app.py +842 -488

app.py CHANGED Viewed

@@ -27,40 +27,24 @@ from pyngrok import ngrok
 import threading
 import asyncio
 import concurrent.futures
-from concurrent.futures import ThreadPoolExecutor
 app = Flask(__name__)
-CORS(app)
 # Configure logging
-log_dir = '/app/logs'
-os.makedirs(log_dir, exist_ok=True)
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     handlers=[
-        logging.StreamHandler(),
-        logging.FileHandler(os.path.join(log_dir, 'app.log'))
     ]
 )
 logger = logging.getLogger(__name__)
-# Set Hugging Face cache directory
-os.environ['TRANSFORMERS_CACHE'] = '/app/cache'
-os.environ['HF_HOME'] = '/app/cache'
-os.environ['XDG_CACHE_HOME'] = '/app/cache'
 # Initialize geocoder
 geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
-# Model instances
-clip_processor = None
-clip_model = None
-sentence_model = None
-nlp = None
-zero_shot_classifier = None
 # Cache models
 @lru_cache(maxsize=10)
 def load_model(task, model_name):
@@ -71,46 +55,31 @@ def load_model(task, model_name):
         logger.error(f"Error loading model {model_name}: {str(e)}")
         raise
-def get_clip_model():
-    global clip_processor, clip_model
-    if clip_processor is None or clip_model is None:
-        try:
-            clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
-            clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
-            logger.info("CLIP model loaded successfully")
-        except Exception as e:
-            logger.error(f"Error loading CLIP model: {str(e)}")
-    return clip_processor, clip_model
-def get_sentence_model():
-    global sentence_model
-    if sentence_model is None:
-        try:
-            sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
-            logger.info("Sentence transformer loaded successfully")
-        except Exception as e:
-            logger.error(f"Error loading sentence transformer: {str(e)}")
-    return sentence_model
-def get_spacy_model():
-    global nlp
-    if nlp is None:
-        try:
-            nlp = spacy.load('en_core_web_sm')
-            logger.info("spaCy model loaded successfully")
-        except Exception as e:
-            logger.error(f"Error loading spaCy model: {str(e)}")
-    return nlp
-def get_zero_shot_classifier():
-    global zero_shot_classifier
-    if zero_shot_classifier is None:
-        try:
-            zero_shot_classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
-            logger.info("Zero-shot classifier loaded successfully")
-        except Exception as e:
-            logger.error(f"Error loading zero-shot classifier: {str(e)}")
-    return zero_shot_classifier
 def make_json_serializable(obj):
     try:
@@ -126,8 +95,6 @@ def make_json_serializable(obj):
             return obj.item() if hasattr(obj, 'item') else float(obj)
         elif isinstance(obj, np.ndarray):
             return obj.tolist()
-        elif isinstance(obj, np.bool_):
-            return bool(obj)
         else:
             return str(obj)
     except Exception as e:
@@ -233,232 +200,372 @@ def get_location():
         }), 500
 def calculate_final_verdict(results):
     try:
         # Initialize verdict components
         verdict = {
             'status': 'unknown',
-            'score': 0.0,
             'confidence': 0.0,
             'reasons': [],
-            'warnings': [],
             'critical_issues': [],
             'recommendations': []
         }
-        # Calculate base score from different analyses
-        scores = []
-        weights = []
-        # Location analysis
-        if 'location_analysis' in results:
-            loc_score = results['location_analysis'].get('completeness_score', 0)
-            scores.append(loc_score)
-            weights.append(0.3)
-        # Price analysis
-        if 'price_analysis' in results:
-            price_conf = results['price_analysis'].get('confidence', 0)
-            scores.append(price_conf * 100)
-            weights.append(0.2)
-        # Legal analysis
-        if 'legal_analysis' in results:
-            legal_score = results['legal_analysis'].get('completeness_score', 0)
-            scores.append(legal_score)
-            weights.append(0.3)
-        # Specs analysis
-        if 'specs_analysis' in results:
-            specs_score = results['specs_analysis'].get('verification_score', 0)
-            scores.append(specs_score)
-            weights.append(0.2)
-        # Calculate weighted average score
-        if scores and weights:
-            verdict['score'] = sum(s * w for s, w in zip(scores, weights)) / sum(weights)
-            verdict['confidence'] = min(1.0, len(scores) / 4.0)  # Confidence based on available analyses
-        # Determine status based on score
-        if verdict['score'] >= 80:
-            verdict['status'] = 'verified'
-        elif verdict['score'] >= 60:
-            verdict['status'] = 'partially_verified'
         else:
-            verdict['status'] = 'unverified'
-        # Add reasons and recommendations
-        if 'location_analysis' in results:
-            verdict['reasons'].append(f"Location verification: {results['location_analysis'].get('assessment', 'unknown')}")
-            if results['location_analysis'].get('location_quality') != 'verified':
-                verdict['warnings'].append("Location details need verification")
-        if 'price_analysis' in results:
-            verdict['reasons'].append(f"Price assessment: {results['price_analysis'].get('assessment', 'unknown')}")
-            if results['price_analysis'].get('assessment') == 'suspiciously high price':
-                verdict['warnings'].append("Property price seems unusually high for the area")
-        if 'legal_analysis' in results:
-            verdict['reasons'].append(f"Legal assessment: {results['legal_analysis'].get('assessment', 'unknown')}")
-            if results['legal_analysis'].get('potential_issues'):
-                verdict['critical_issues'].append("Potential legal issues detected")
-        if 'specs_analysis' in results:
-            verdict['reasons'].append(f"Specifications verification: {'valid' if results['specs_analysis'].get('is_valid') else 'invalid'}")
-            if not results['specs_analysis'].get('is_valid'):
-                verdict['warnings'].extend(results['specs_analysis'].get('issues', []))
-        # Add recommendations
-        if verdict['status'] == 'unverified':
-            verdict['recommendations'].append("Additional verification required")
-        if verdict['warnings']:
-            verdict['recommendations'].append("Address the warnings before proceeding")
-        if verdict['critical_issues']:
-            verdict['recommendations'].append("Resolve critical issues before proceeding")
         return verdict
     except Exception as e:
         logger.error(f"Error calculating final verdict: {str(e)}")
         return {
             'status': 'error',
-            'score': 0.0,
             'confidence': 0.0,
             'reasons': [f"Error calculating verdict: {str(e)}"],
-            'warnings': [],
             'critical_issues': [],
             'recommendations': ["Unable to determine property status due to an error"]
         }
 @app.route('/verify', methods=['POST'])
 def verify_property():
     try:
-        results = {
-            'location_analysis': {},
-            'price_analysis': {},
-            'specifications_analysis': {},
-            'legal_details_analysis': {},
-            'image_analysis': [],
-            'pdf_analysis': [],
-            'final_verdict': {},
-            'suggestions': []
-        }
-        # Check if request is JSON
-        if request.is_json:
-            data = request.get_json()
-        else:
-            data = {
-                'property_name': request.form.get('property_name', '').strip(),
-                'property_type': request.form.get('property_type', '').strip(),
-                'status': request.form.get('status', '').strip(),
-                'address': request.form.get('address', '').strip(),
-                'city': request.form.get('city', '').strip(),
-                'state': request.form.get('state', '').strip(),
-                'country': request.form.get('country', 'India').strip(),
-                'pincode': request.form.get('pincode', '').strip(),
-                'price': request.form.get('price', '').strip(),
-                'market_value': request.form.get('market_value', '').strip(),
-                'area': request.form.get('area', '').strip(),
-                'bedrooms': request.form.get('bedrooms', '').strip(),
-                'bathrooms': request.form.get('bathrooms', '').strip(),
-                'floors': request.form.get('floors', '').strip(),
-                'furnishing': request.form.get('furnishing', '').strip(),
-                'parking': request.form.get('parking', '').strip(),
-                'description': request.form.get('description', '').strip(),
-                'legal_status': request.form.get('legal_status', '').strip(),
-                'possession': request.form.get('possession', '').strip(),
-                'age': request.form.get('age', '').strip(),
-                'facing': request.form.get('facing', '').strip(),
-                'amenities': request.form.get('amenities', '').strip()
-            }
         # Validate required fields
         required_fields = ['property_name', 'property_type', 'address', 'city', 'state']
-        missing_fields = [field for field in required_fields if not data.get(field)]
         if missing_fields:
             return jsonify({
-                'error': 'Missing required fields',
-                'missing_fields': missing_fields
             }), 400
-        # Process images and PDFs in parallel
-        with ThreadPoolExecutor(max_workers=4) as executor:
-            # Process images
-            image_files = request.files.getlist('images')
-            if image_files:
-                image_futures = []
-                for img_file in image_files:
-                    future = executor.submit(process_image, img_file)
-                    image_futures.append(future)
-                for future in concurrent.futures.as_completed(image_futures):
                     try:
-                        result = future.result()
-                        results['image_analysis'].append(result)
                     except Exception as e:
-                        logger.error(f"Error processing image: {str(e)}")
-                        results['image_analysis'].append({'error': str(e), 'is_property_related': False})
-            # Process PDFs
-            pdf_files = request.files.getlist('documents')
-            if pdf_files:
-                pdf_futures = []
-                for pdf_file in pdf_files:
-                    future = executor.submit(process_pdf, pdf_file, data)
-                    pdf_futures.append(future)
-                for future in concurrent.futures.as_completed(pdf_futures):
                     try:
-                        result = future.result()
-                        results['pdf_analysis'].append(result)
                     except Exception as e:
-                        logger.error(f"Error processing PDF: {str(e)}")
-                        results['pdf_analysis'].append({'error': str(e)})
-        # Run analysis tasks in parallel
-        with ThreadPoolExecutor(max_workers=4) as executor:
-            futures = {
-                'location_analysis': executor.submit(analyze_location, data),
-                'price_analysis': executor.submit(analyze_price, data),
-                'specifications_analysis': executor.submit(verify_property_specs, data),
-                'legal_details_analysis': executor.submit(analyze_legal_details, data.get('legal_status', ''))
-            }
-            for key, future in futures.items():
-                try:
-                    results[key] = future.result()
-                except Exception as e:
-                    logger.error(f"Error in {key}: {str(e)}")
-                    results[key] = {'error': str(e)}
-        # Calculate final verdict and suggestions
-        results['final_verdict'] = calculate_final_verdict(results)
-        results['suggestions'] = generate_suggestions(data.get('description', ''), data)
-        # Ensure all results are JSON serializable
-        serializable_results = make_json_serializable(results)
-        return jsonify(serializable_results)
-    except Exception as e:
-        logger.error(f"Error in verify_property: {str(e)}")
-        return jsonify({'error': str(e)}), 500
-def process_image(img_file):
-    try:
-        img = Image.open(img_file)
-        buffered = io.BytesIO()
-        img.save(buffered, format="JPEG")
-        img_str = base64.b64encode(buffered.getvalue()).decode()
-        return analyze_image(img)
-    except Exception as e:
-        raise Exception(f"Error processing image {img_file.filename}: {str(e)}")
-def process_pdf(pdf_file, data):
-    try:
-        pdf_text = extract_pdf_text(pdf_file)
-        return analyze_pdf_content(pdf_text, data)
     except Exception as e:
-        raise Exception(f"Error processing PDF {pdf_file.filename}: {str(e)}")
 def extract_pdf_text(pdf_file):
     try:
@@ -474,8 +581,7 @@ def extract_pdf_text(pdf_file):
 def analyze_image(image):
     try:
-        if clip_processor is None or clip_model is None:
-            get_clip_model()
             img_rgb = image.convert('RGB')
             inputs = clip_processor(
                 text=[
@@ -511,6 +617,16 @@ def analyze_image(image):
                 'is_ai_generated': is_ai_generated,
                 'authenticity_score': 0.95 if not is_ai_generated else 0.60
             }
     except Exception as e:
         logger.error(f"Error analyzing image: {str(e)}")
         return {
@@ -542,120 +658,113 @@ def detect_ai_generated_image(image):
 def analyze_pdf_content(document_text, property_data):
     try:
-        if not document_text or len(document_text.strip()) < 10:
             return {
-                'document_type': 'unknown',
-                'confidence': 0.0,
                 'key_info': {},
-                'summary': 'No text content found in document',
                 'consistency_score': 0.0,
-                'risk_indicators': []
             }
-        # Use sentence transformer for document classification
-        sentence_model = get_sentence_model()
-        # Define document types
-        document_types = [
-            "sale deed",
-            "property tax receipt",
-            "encumbrance certificate",
-            "building approval",
-            "occupancy certificate",
-            "power of attorney",
-            "lease agreement",
-            "will",
-            "gift deed",
-            "partition deed"
-        ]
-        # Convert document types to embeddings
-        type_embeddings = sentence_model.encode(document_types)
-        doc_embedding = sentence_model.encode(document_text)
-        # Calculate similarities
-        similarities = util.pytorch_cos_sim(doc_embedding, type_embeddings)[0]
-        doc_type_idx = similarities.argmax().item()
-        doc_type = document_types[doc_type_idx]
-        confidence = float(similarities[doc_type_idx])
-        # Extract key information
         key_info = extract_document_key_info(document_text)
-        # Generate summary
-        summary = summarize_text(document_text)
-        # Check consistency with property data
         consistency_score = check_document_consistency(document_text, property_data)
-        # Identify risk indicators
-        risk_indicators = []
-        if consistency_score < 0.7:
-            risk_indicators.append("Document content inconsistent with property details")
-        if confidence < 0.6:
-            risk_indicators.append("Low confidence in document type identification")
-        if len(key_info) < 3:
-            risk_indicators.append("Limited key information extracted")
         return {
-            'document_type': doc_type,
-            'confidence': confidence,
             'key_info': key_info,
             'summary': summary,
-            'consistency_score': consistency_score,
-            'risk_indicators': risk_indicators
         }
     except Exception as e:
         logger.error(f"Error analyzing PDF content: {str(e)}")
         return {
-            'document_type': 'error',
-            'confidence': 0.0,
             'key_info': {},
-            'summary': f'Error analyzing document: {str(e)}',
             'consistency_score': 0.0,
-            'risk_indicators': ['Error in document analysis']
         }
-def summarize_text(text):
-    try:
-        if not text or len(text.strip()) < 10:
-            return "No text to summarize."
-        # Use sentence transformer for summarization
-        sentence_model = get_sentence_model()
-        # Split text into sentences
-        sentences = text.split('.')
-        sentences = [s.strip() for s in sentences if s.strip()]
-        if not sentences:
-            return "No valid sentences found."
-        # Get sentence embeddings
-        sentence_embeddings = sentence_model.encode(sentences)
-        # Calculate sentence importance (using first sentence and average similarity)
-        first_sentence_embedding = sentence_embeddings[0]
-        similarities = util.pytorch_cos_sim(first_sentence_embedding, sentence_embeddings)[0]
-        avg_similarity = similarities.mean().item()
-        # Select important sentences
-        important_sentences = []
-        for i, (sentence, similarity) in enumerate(zip(sentences, similarities)):
-            if similarity > avg_similarity * 0.8:  # 80% of average similarity
-                important_sentences.append(sentence)
-                if len(important_sentences) >= 3:  # Limit to 3 sentences
-                    break
-        return '. '.join(important_sentences) + '.'
-    except Exception as e:
-        logger.error(f"Error summarizing text: {str(e)}")
-        return "Error generating summary."
 def check_document_consistency(document_text, property_data):
     try:
-        if sentence_model is None:
-            get_sentence_model()
         property_text = ' '.join([
             property_data.get(key, '') for key in [
                 'property_name', 'property_type', 'address', 'city',
@@ -708,7 +817,7 @@ def generate_property_summary(data):
         """
         # Use BART for summary generation
-        summarizer = get_sentence_model()
         # Generate initial summary
         summary_result = summarizer(property_context, max_length=150, min_length=50, do_sample=False)
@@ -762,6 +871,20 @@ def generate_property_summary(data):
         logger.error(f"Error generating property summary: {str(e)}")
         return "Could not generate summary."
 def classify_fraud(property_details, description):
     """
     Classify the risk of fraud in a property listing using zero-shot classification.
@@ -791,7 +914,7 @@ def classify_fraud(property_details, description):
         ]
         # Perform zero-shot classification
-        classifier = get_zero_shot_classifier()
         result = classifier(text_to_analyze, risk_categories, multi_label=True)
         # Process classification results
@@ -903,7 +1026,7 @@ def classify_fraud(property_details, description):
 def generate_trust_score(text, image_analysis, pdf_analysis):
     try:
-        classifier = get_zero_shot_classifier()
         aspects = [
             "complete information provided",
             "verified location",
@@ -1019,14 +1142,14 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
 def generate_suggestions(text, data=None):
     try:
-        classifier = get_zero_shot_classifier()
         # Create comprehensive context for analysis
         suggestion_context = text
         if data:
             suggestion_context += f"""
             Additional Context:
-            Property Type: {data.get('property_type', '')} "
             Location: {data.get('city', '')}, {data.get('state', '')}
             Size: {data.get('sq_ft', '')} sq.ft.
             Year Built: {data.get('year_built', '')}
@@ -1267,7 +1390,7 @@ def assess_text_quality(text):
                 'quality_metrics': {}
             }
-        classifier = get_zero_shot_classifier()
         # Enhanced quality categories with more specific indicators
         quality_categories = [
@@ -1811,124 +1934,396 @@ def perform_cross_validation(data):
 def analyze_location(data):
     try:
-        address = data.get('address', '')
-        city = data.get('city', '')
-        state = data.get('state', '')
-        country = data.get('country', 'India')
-        pincode = data.get('pincode', '')
-        zip_code = data.get('zip', pincode)  # Use pincode if zip is not provided
-        # Build location text
-        location_parts = [part for part in [address, city, state, country, zip_code] if part]
-        location_text = ', '.join(location_parts)
-        # Geocode location
-        coordinates = None
-        try:
-            location = geocoder.geocode(location_text, timeout=10)
-            if location:
-                coordinates = {
-                    'latitude': location.latitude,
-                    'longitude': location.longitude
-                }
-        except Exception as e:
-            logger.error(f"Error geocoding location: {str(e)}")
         return {
-            'address': address,
-            'city': city,
-            'state': state,
-            'country': country,
-            'pincode': pincode,
-            'zip': zip_code,
-            'coordinates': coordinates,
-            'completeness_score': calculate_location_completeness(data)
         }
     except Exception as e:
         logger.error(f"Error analyzing location: {str(e)}")
-        return {'error': str(e)}
 def calculate_location_completeness(data):
     try:
-        # Define weights for different fields
-        weights = {
-            'address': 0.2,
-            'city': 0.2,
-            'state': 0.2,
-            'country': 0.1,
-            'pincode': 0.15,
-            'zip': 0.15
         }
-        # Calculate completeness score
-        score = 0
-        for field, weight in weights.items():
-            if data.get(field):
-                score += weight
-        return min(100, score * 100)  # Convert to percentage, max 100
-    except Exception as e:
-        logger.error(f"Error calculating location completeness: {str(e)}")
-        return 0
-def analyze_price(data):
-    try:
-        # Handle empty or invalid price values
-        price_str = data.get('price', '0').strip()
-        market_value_str = data.get('market_value', price_str).strip()
-        area_str = data.get('area', '0').strip()
-        # Remove currency symbols and commas
-        price_str = price_str.replace('₹', '').replace('$', '').replace(',', '')
-        market_value_str = market_value_str.replace('₹', '').replace('$', '').replace(',', '')
-        area_str = area_str.replace(',', '')
-        # Convert to float with safe defaults
-        price = float(price_str) if price_str and price_str.replace('.', '').isdigit() else 0
-        market_value = float(market_value_str) if market_value_str and market_value_str.replace('.', '').isdigit() else price
-        area = float(area_str) if area_str and area_str.replace('.', '').isdigit() else 0
-        # Calculate price per sqft
-        price_per_sqft = price / area if area > 0 else 0
         return {
             'price': price,
-            'area': area,
             'price_per_sqft': price_per_sqft,
-            'market_value': market_value,
-            'price_comparison': {
-                'is_reasonable': price <= market_value * 1.1 if market_value > 0 else True,
-                'price_difference': market_value - price,
-                'price_difference_percentage': ((market_value - price) / market_value) * 100 if market_value > 0 else 0
-            }
         }
     except Exception as e:
         logger.error(f"Error analyzing price: {str(e)}")
-        return {'error': str(e)}
 def analyze_legal_details(legal_text):
     try:
-        # Initialize analysis results
-        analysis = {
-            'assessment': 'unknown',
                 'confidence': 0.0,
                 'completeness_score': 0,
-            'potential_issues': False,  # Changed to lowercase false
-            'reasoning': '',
-            'summary': '',
-            'legal_metrics': {
-                'completeness': 0.0,
-                'compliance': 0.0,
-                'documentation_quality': 0.0,
-                'risk_level': 0.0
-            },
                 'top_classifications': []
             }
-        if not legal_text or len(legal_text.strip()) < 5:
-            return analysis
-        classifier = get_zero_shot_classifier()
         # Enhanced legal categories with more specific indicators
         categories = [
@@ -1977,9 +2372,7 @@ def analyze_legal_details(legal_text):
                 })
         # Generate summary using BART
-        summarizer = get_sentence_model()
-        summary = summarizer(legal_text[:1000], max_length=150, min_length=50, do_sample=False)
-        initial_summary = summary[0]['summary_text']
         # Calculate legal metrics with weighted scoring
         legal_metrics = {
@@ -2048,36 +2441,26 @@ def analyze_legal_details(legal_text):
             (1 - legal_metrics['risk_level']) * 0.2
         ))
-        analysis['assessment'] = top_classifications[0]['classification'] if top_classifications else 'could not assess'
-        analysis['confidence'] = float(overall_confidence)
-        analysis['summary'] = initial_summary
-        analysis['completeness_score'] = int(completeness_score)
-        analysis['potential_issues'] = potential_issues
-        analysis['legal_metrics'] = legal_metrics
-        analysis['reasoning'] = '. '.join(reasoning_parts)
-        analysis['top_classifications'] = top_classifications
-        # Update potential_issues based on analysis
-        if potential_issues:
-            analysis['potential_issues'] = True  # Changed to lowercase true
-        return analysis
     except Exception as e:
         logger.error(f"Error analyzing legal details: {str(e)}")
         return {
-            'assessment': 'error',
             'confidence': 0.0,
             'completeness_score': 0,
-            'potential_issues': False,  # Changed to lowercase false
-            'reasoning': f'Error analyzing legal details: {str(e)}',
-            'summary': '',
-            'legal_metrics': {
-                'completeness': 0.0,
-                'compliance': 0.0,
-                'documentation_quality': 0.0,
-                'risk_level': 0.0
-            },
             'top_classifications': []
         }
@@ -2561,7 +2944,7 @@ def assess_image_quality(img):
 def check_if_property_related(text):
     try:
-        classifier = get_zero_shot_classifier()
         result = classifier(text[:1000], ["property-related", "non-property-related"])
         is_related = result['labels'][0] == "property-related"
         return {
@@ -2575,35 +2958,6 @@ def check_if_property_related(text):
             'confidence': 0.0
         }
-# Optimize model loading
-def load_models_in_background():
-    """Load models in background to avoid blocking the main thread"""
-    def load_models():
-        try:
-            # Load models in parallel
-            with ThreadPoolExecutor(max_workers=4) as executor:
-                futures = [
-                    executor.submit(get_clip_model),
-                    executor.submit(get_sentence_model),
-                    executor.submit(get_spacy_model),
-                    executor.submit(get_zero_shot_classifier)
-                ]
-                # Wait for all models to load
-                for future in concurrent.futures.as_completed(futures):
-                    try:
-                        future.result()
-                    except Exception as e:
-                        logger.error(f"Error loading model: {str(e)}")
-        except Exception as e:
-            logger.error(f"Error in background model loading: {str(e)}")
-    # Start model loading in background
-    threading.Thread(target=load_models, daemon=True).start()
-# Start model loading when the app starts
-load_models_in_background()
 if __name__ == '__main__':
     # Run Flask app
     app.run(host='0.0.0.0', port=8000, debug=True, use_reloader=False)

 import threading
 import asyncio
 import concurrent.futures
 app = Flask(__name__)
+CORS(app)  # Enable CORS for frontend
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
     handlers=[
+        logging.FileHandler('app.log'),
+        logging.StreamHandler()
     ]
 )
 logger = logging.getLogger(__name__)
 # Initialize geocoder
 geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
 # Cache models
 @lru_cache(maxsize=10)
 def load_model(task, model_name):
         logger.error(f"Error loading model {model_name}: {str(e)}")
         raise
+# Initialize CLIP model
+try:
+    clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+    clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+    has_clip_model = True
+    logger.info("CLIP model loaded successfully")
+except Exception as e:
+    logger.error(f"Error loading CLIP model: {str(e)}")
+    has_clip_model = False
+# Initialize sentence transformer
+try:
+    sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
+    logger.info("Sentence transformer loaded successfully")
+except Exception as e:
+    logger.error(f"Error loading sentence transformer: {str(e)}")
+    sentence_model = None
+# Initialize spaCy
+try:
+    nlp = spacy.load('en_core_web_md')
+    logger.info("spaCy model loaded successfully")
+except Exception as e:
+    logger.error(f"Error loading spaCy model: {str(e)}")
+    nlp = None
 def make_json_serializable(obj):
     try:
             return obj.item() if hasattr(obj, 'item') else float(obj)
         elif isinstance(obj, np.ndarray):
             return obj.tolist()
         else:
             return str(obj)
     except Exception as e:
         }), 500
 def calculate_final_verdict(results):
+    """
+    Calculate a comprehensive final verdict based on all analysis results.
+    This function combines all verification scores, fraud indicators, and quality assessments
+    to determine if a property listing is legitimate, suspicious, or fraudulent.
+    """
     try:
         # Initialize verdict components
         verdict = {
             'status': 'unknown',
             'confidence': 0.0,
+            'score': 0.0,
             'reasons': [],
             'critical_issues': [],
+            'warnings': [],
             'recommendations': []
         }
+        # Extract key components from results
+        trust_score = results.get('trust_score', {}).get('score', 0)
+        fraud_classification = results.get('fraud_classification', {})
+        quality_assessment = results.get('quality_assessment', {})
+        specs_verification = results.get('specs_verification', {})
+        cross_validation = results.get('cross_validation', [])
+        location_analysis = results.get('location_analysis', {})
+        price_analysis = results.get('price_analysis', {})
+        legal_analysis = results.get('legal_analysis', {})
+        document_analysis = results.get('document_analysis', {})
+        image_analysis = results.get('image_analysis', {})
+        # Calculate component scores (0-100)
+        component_scores = {
+            'trust': trust_score,
+            'fraud': 100 - (fraud_classification.get('alert_score', 0) * 100),
+            'quality': quality_assessment.get('score', 0),
+            'specs': specs_verification.get('verification_score', 0),
+            'location': location_analysis.get('completeness_score', 0),
+            'price': price_analysis.get('confidence', 0) * 100 if price_analysis.get('has_price') else 0,
+            'legal': legal_analysis.get('completeness_score', 0),
+            'documents': min(100, (document_analysis.get('pdf_count', 0) / 3) * 100) if document_analysis.get('pdf_count') else 0,
+            'images': min(100, (image_analysis.get('image_count', 0) / 5) * 100) if image_analysis.get('image_count') else 0
+        }
+        # Calculate weighted final score with adjusted weights
+        weights = {
+            'trust': 0.20,
+            'fraud': 0.25,  # Increased weight for fraud detection
+            'quality': 0.15,
+            'specs': 0.10,
+            'location': 0.10,
+            'price': 0.05,
+            'legal': 0.05,
+            'documents': 0.05,
+            'images': 0.05
+        }
+        final_score = sum(score * weights.get(component, 0) for component, score in component_scores.items())
+        verdict['score'] = final_score
+        # Determine verdict status based on multiple factors
+        fraud_level = fraud_classification.get('alert_level', 'minimal')
+        high_risk_indicators = len(fraud_classification.get('high_risk', []))
+        critical_issues = []
+        warnings = []
+        # Check for critical issues
+        if fraud_level in ['critical', 'high']:
+            critical_issues.append(f"High fraud risk detected: {fraud_level} alert level")
+        if trust_score < 40:
+            critical_issues.append(f"Very low trust score: {trust_score}%")
+        if quality_assessment.get('score', 0) < 30:
+            critical_issues.append(f"Very low content quality: {quality_assessment.get('score', 0)}%")
+        if specs_verification.get('verification_score', 0) < 40:
+            critical_issues.append(f"Property specifications verification failed: {specs_verification.get('verification_score', 0)}%")
+        # Check for warnings
+        if fraud_level == 'medium':
+            warnings.append(f"Medium fraud risk detected: {fraud_level} alert level")
+        if trust_score < 60:
+            warnings.append(f"Low trust score: {trust_score}%")
+        if quality_assessment.get('score', 0) < 60:
+            warnings.append(f"Low content quality: {quality_assessment.get('score', 0)}%")
+        if specs_verification.get('verification_score', 0) < 70:
+            warnings.append(f"Property specifications have issues: {specs_verification.get('verification_score', 0)}%")
+        # Check cross-validation results
+        for check in cross_validation:
+            if check.get('status') in ['inconsistent', 'invalid', 'suspicious', 'no_match']:
+                warnings.append(f"Cross-validation issue: {check.get('message', 'Unknown issue')}")
+        # Check for missing critical information
+        missing_critical = []
+        if not location_analysis.get('completeness_score', 0) > 70:
+            missing_critical.append("Location information is incomplete")
+        if not price_analysis.get('has_price', False):
+            missing_critical.append("Price information is missing")
+        if not legal_analysis.get('completeness_score', 0) > 70:
+            missing_critical.append("Legal information is incomplete")
+        if document_analysis.get('pdf_count', 0) == 0:
+            missing_critical.append("No supporting documents provided")
+        if image_analysis.get('image_count', 0) == 0:
+            missing_critical.append("No property images provided")
+        if missing_critical:
+            warnings.append(f"Missing critical information: {', '.join(missing_critical)}")
+        # Enhanced verdict determination with more strict criteria
+        if critical_issues or (fraud_level in ['critical', 'high'] and trust_score < 50) or high_risk_indicators > 0:
+            verdict['status'] = 'fraudulent'
+            verdict['confidence'] = min(100, max(70, 100 - (trust_score * 0.5)))
+        elif warnings or (fraud_level == 'medium' and trust_score < 70) or specs_verification.get('verification_score', 0) < 60:
+            verdict['status'] = 'suspicious'
+            verdict['confidence'] = min(100, max(50, trust_score * 0.8))
+        else:
+            verdict['status'] = 'legitimate'
+            verdict['confidence'] = min(100, max(70, trust_score * 0.9))
+        # Add reasons to verdict
+        verdict['critical_issues'] = critical_issues
+        verdict['warnings'] = warnings
+        # Add recommendations based on issues
+        if critical_issues:
+            verdict['recommendations'].append("Do not proceed with this property listing")
+            verdict['recommendations'].append("Report this listing to the platform")
+        elif warnings:
+            verdict['recommendations'].append("Proceed with extreme caution")
+            verdict['recommendations'].append("Request additional verification documents")
+            verdict['recommendations'].append("Verify all information with independent sources")
         else:
+            verdict['recommendations'].append("Proceed with standard due diligence")
+            verdict['recommendations'].append("Verify final details before transaction")
+        # Add specific recommendations based on missing information
+        for missing in missing_critical:
+            verdict['recommendations'].append(f"Request {missing.lower()}")
         return verdict
     except Exception as e:
         logger.error(f"Error calculating final verdict: {str(e)}")
         return {
             'status': 'error',
             'confidence': 0.0,
+            'score': 0.0,
             'reasons': [f"Error calculating verdict: {str(e)}"],
             'critical_issues': [],
+            'warnings': [],
             'recommendations': ["Unable to determine property status due to an error"]
         }
 @app.route('/verify', methods=['POST'])
 def verify_property():
     try:
+        if not request.form and not request.files:
+            logger.warning("No form data or files provided")
+            return jsonify({
+                'error': 'No data provided',
+                'status': 'error'
+            }), 400
+        # Extract form data
+        data = {
+            'property_name': request.form.get('property_name', '').strip(),
+            'property_type': request.form.get('property_type', '').strip(),
+            'status': request.form.get('status', '').strip(),
+            'description': request.form.get('description', '').strip(),
+            'address': request.form.get('address', '').strip(),
+            'city': request.form.get('city', '').strip(),
+            'state': request.form.get('state', '').strip(),
+            'country': request.form.get('country', 'India').strip(),
+            'zip': request.form.get('zip', '').strip(),
+            'latitude': request.form.get('latitude', '').strip(),
+            'longitude': request.form.get('longitude', '').strip(),
+            'bedrooms': request.form.get('bedrooms', '').strip(),
+            'bathrooms': request.form.get('bathrooms', '').strip(),
+            'total_rooms': request.form.get('total_rooms', '').strip(),
+            'year_built': request.form.get('year_built', '').strip(),
+            'parking': request.form.get('parking', '').strip(),
+            'sq_ft': request.form.get('sq_ft', '').strip(),
+            'market_value': request.form.get('market_value', '').strip(),
+            'amenities': request.form.get('amenities', '').strip(),
+            'nearby_landmarks': request.form.get('nearby_landmarks', '').strip(),
+            'legal_details': request.form.get('legal_details', '').strip()
+        }
         # Validate required fields
         required_fields = ['property_name', 'property_type', 'address', 'city', 'state']
+        missing_fields = [field for field in required_fields if not data[field]]
         if missing_fields:
+            logger.warning(f"Missing required fields: {', '.join(missing_fields)}")
             return jsonify({
+                'error': f"Missing required fields: {', '.join(missing_fields)}",
+                'status': 'error'
             }), 400
+        # Process images
+        images = []
+        image_analysis = []
+        if 'images' in request.files:
+            # Get unique image files by filename to prevent duplicates
+            image_files = {}
+            for img_file in request.files.getlist('images'):
+                if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
+                    image_files[img_file.filename] = img_file
+            # Process unique images
+            for img_file in image_files.values():
                     try:
+                        img = Image.open(img_file)
+                        buffered = io.BytesIO()
+                        img.save(buffered, format="JPEG")
+                        img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
+                        images.append(img_str)
+                        image_analysis.append(analyze_image(img))
                     except Exception as e:
+                        logger.error(f"Error processing image {img_file.filename}: {str(e)}")
+                        image_analysis.append({'error': str(e), 'is_property_related': False})
+        # Process PDFs
+        pdf_texts = []
+        pdf_analysis = []
+        if 'documents' in request.files:
+            # Get unique PDF files by filename to prevent duplicates
+            pdf_files = {}
+            for pdf_file in request.files.getlist('documents'):
+                if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
+                    pdf_files[pdf_file.filename] = pdf_file
+            # Process unique PDFs
+            for pdf_file in pdf_files.values():
                     try:
+                        pdf_text = extract_pdf_text(pdf_file)
+                        pdf_texts.append({
+                            'filename': pdf_file.filename,
+                            'text': pdf_text
+                        })
+                        pdf_analysis.append(analyze_pdf_content(pdf_text, data))
                     except Exception as e:
+                        logger.error(f"Error processing PDF {pdf_file.filename}: {str(e)}")
+                        pdf_analysis.append({'error': str(e)})
+        # Create consolidated text for analysis
+        consolidated_text = f"""
+        Property Name: {data['property_name']}
+        Property Type: {data['property_type']}
+        Status: {data['status']}
+        Description: {data['description']}
+        Location: {data['address']}, {data['city']}, {data['state']}, {data['country']}, {data['zip']}
+        Coordinates: Lat {data['latitude']}, Long {data['longitude']}
+        Specifications: {data['bedrooms']} bedrooms, {data['bathrooms']} bathrooms, {data['total_rooms']} total rooms
+        Year Built: {data['year_built']}
+        Parking: {data['parking']}
+        Size: {data['sq_ft']} sq. ft.
+        Market Value: ₹{data['market_value']}
+        Amenities: {data['amenities']}
+        Nearby Landmarks: {data['nearby_landmarks']}
+        Legal Details: {data['legal_details']}
+        """
+        # Process description translation if needed
+        try:
+            description = data['description']
+            if description and len(description) > 10:
+                text_language = detect(description)
+                if text_language != 'en':
+                    translated_description = GoogleTranslator(source=text_language, target='en').translate(description)
+                    data['description_translated'] = translated_description
+                else:
+                    data['description_translated'] = description
+            else:
+                data['description_translated'] = description
+        except Exception as e:
+            logger.error(f"Error in language detection/translation: {str(e)}")
+            data['description_translated'] = data['description']
+        # Run all analyses in parallel using asyncio
+        async def run_analyses():
+            with concurrent.futures.ThreadPoolExecutor() as executor:
+                loop = asyncio.get_event_loop()
+                tasks = [
+                    loop.run_in_executor(executor, generate_property_summary, data),
+                    loop.run_in_executor(executor, classify_fraud, consolidated_text, data),
+                    loop.run_in_executor(executor, generate_trust_score, consolidated_text, image_analysis, pdf_analysis),
+                    loop.run_in_executor(executor, generate_suggestions, consolidated_text, data),
+                    loop.run_in_executor(executor, assess_text_quality, data['description_translated']),
+                    loop.run_in_executor(executor, verify_address, data),
+                    loop.run_in_executor(executor, perform_cross_validation, data),
+                    loop.run_in_executor(executor, analyze_location, data),
+                    loop.run_in_executor(executor, analyze_price, data),
+                    loop.run_in_executor(executor, analyze_legal_details, data['legal_details']),
+                    loop.run_in_executor(executor, verify_property_specs, data),
+                    loop.run_in_executor(executor, analyze_market_value, data)
+                ]
+                results = await asyncio.gather(*tasks)
+                return results
+        # Run analyses and get results
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        analysis_results = loop.run_until_complete(run_analyses())
+        loop.close()
+        # Unpack results
+        summary, fraud_classification, (trust_score, trust_reasoning), suggestions, quality_assessment, \
+        address_verification, cross_validation, location_analysis, price_analysis, legal_analysis, \
+        specs_verification, market_analysis = analysis_results
+        # Prepare response
+        document_analysis = {
+            'pdf_count': len(pdf_texts),
+            'pdf_texts': pdf_texts,
+            'pdf_analysis': pdf_analysis
+        }
+        image_results = {
+            'image_count': len(images),
+            'image_analysis': image_analysis
+        }
+        report_id = str(uuid.uuid4())
+        # Create results dictionary
+        results = {
+            'report_id': report_id,
+            'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            'summary': summary,
+            'fraud_classification': fraud_classification,
+            'trust_score': {
+                'score': trust_score,
+                'reasoning': trust_reasoning
+            },
+            'suggestions': suggestions,
+            'quality_assessment': quality_assessment,
+            'address_verification': address_verification,
+            'cross_validation': cross_validation,
+            'location_analysis': location_analysis,
+            'price_analysis': price_analysis,
+            'legal_analysis': legal_analysis,
+            'document_analysis': document_analysis,
+            'image_analysis': image_results,
+            'specs_verification': specs_verification,
+            'market_analysis': market_analysis,
+            'images': images
+        }
+        # Calculate final verdict
+        final_verdict = calculate_final_verdict(results)
+        results['final_verdict'] = final_verdict
+        return jsonify(make_json_serializable(results))
     except Exception as e:
+        logger.error(f"Error in verify_property: {str(e)}")
+        return jsonify({
+            'error': 'Server error occurred. Please try again later.',
+            'status': 'error',
+            'details': str(e)
+        }), 500
 def extract_pdf_text(pdf_file):
     try:
 def analyze_image(image):
     try:
+        if has_clip_model:
             img_rgb = image.convert('RGB')
             inputs = clip_processor(
                 text=[
                 'is_ai_generated': is_ai_generated,
                 'authenticity_score': 0.95 if not is_ai_generated else 0.60
             }
+        else:
+            logger.warning("CLIP model unavailable")
+            return {
+                'is_property_related': False,
+                'property_confidence': 0.0,
+                'top_predictions': [],
+                'image_quality': assess_image_quality(image),
+                'is_ai_generated': False,
+                'authenticity_score': 0.5
+            }
     except Exception as e:
         logger.error(f"Error analyzing image: {str(e)}")
         return {
 def analyze_pdf_content(document_text, property_data):
     try:
+        if not document_text:
             return {
+                'document_type': {'classification': 'unknown', 'confidence': 0.0},
+                'authenticity': {'assessment': 'could not verify', 'confidence': 0.0},
                 'key_info': {},
                 'consistency_score': 0.0,
+                'is_property_related': False,
+                'summary': 'Empty document',
+                'has_signatures': False,
+                'has_dates': False,
+                'verification_score': 0.0
             }
+        # Use a more sophisticated model for document classification
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
+        # Enhanced document types with more specific categories
+        doc_types = [
+            "property deed", "sales agreement", "mortgage document",
+            "property tax record", "title document", "khata certificate",
+            "encumbrance certificate", "lease agreement", "rental agreement",
+            "property registration document", "building permit", "other document"
+        ]
+        # Analyze document type with context
+        doc_context = f"{document_text[:1000]} property_type:{property_data.get('property_type', '')} location:{property_data.get('city', '')}"
+        doc_result = classifier(doc_context, doc_types)
+        doc_type = doc_result['labels'][0]
+        doc_confidence = doc_result['scores'][0]
+        # Enhanced authenticity check with multiple aspects
+        authenticity_aspects = [
+            "authentic legal document",
+            "questionable document",
+            "forged document",
+            "template document",
+            "official document"
+        ]
+        authenticity_result = classifier(document_text[:1000], authenticity_aspects)
+        authenticity = "likely authentic" if authenticity_result['labels'][0] == "authentic legal document" else "questionable"
+        authenticity_confidence = authenticity_result['scores'][0]
+        # Extract key information using NLP
         key_info = extract_document_key_info(document_text)
+        # Enhanced consistency check
         consistency_score = check_document_consistency(document_text, property_data)
+        # Property relation check with context
+        property_context = f"{document_text[:1000]} property:{property_data.get('property_name', '')} type:{property_data.get('property_type', '')}"
+        is_property_related = check_if_property_related(property_context)['is_related']
+        # Generate summary using BART
+        summary = summarize_text(document_text[:2000])
+        # Enhanced signature and date detection
+        has_signatures = bool(re.search(r'(?:sign|signature|signed|witness|notary|authorized).{0,50}(?:by|of|for)', document_text.lower()))
+        has_dates = bool(re.search(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\d{4}[/-]\d{1,2}[/-]\d{1,2}', document_text))
+        # Calculate verification score with weighted components
+        verification_weights = {
+            'doc_type': 0.3,
+            'authenticity': 0.3,
+            'consistency': 0.2,
+            'property_relation': 0.1,
+            'signatures_dates': 0.1
+        }
+        verification_score = (
+            doc_confidence * verification_weights['doc_type'] +
+            authenticity_confidence * verification_weights['authenticity'] +
+            consistency_score * verification_weights['consistency'] +
+            float(is_property_related) * verification_weights['property_relation'] +
+            float(has_signatures and has_dates) * verification_weights['signatures_dates']
+        )
         return {
+            'document_type': {'classification': doc_type, 'confidence': float(doc_confidence)},
+            'authenticity': {'assessment': authenticity, 'confidence': float(authenticity_confidence)},
             'key_info': key_info,
+            'consistency_score': float(consistency_score),
+            'is_property_related': is_property_related,
             'summary': summary,
+            'has_signatures': has_signatures,
+            'has_dates': has_dates,
+            'verification_score': float(verification_score)
         }
     except Exception as e:
         logger.error(f"Error analyzing PDF content: {str(e)}")
         return {
+            'document_type': {'classification': 'unknown', 'confidence': 0.0},
+            'authenticity': {'assessment': 'could not verify', 'confidence': 0.0},
             'key_info': {},
             'consistency_score': 0.0,
+            'is_property_related': False,
+            'summary': 'Could not analyze document',
+            'has_signatures': False,
+            'has_dates': False,
+            'verification_score': 0.0,
+            'error': str(e)
         }
 def check_document_consistency(document_text, property_data):
     try:
+        if not sentence_model:
+            logger.warning("Sentence model unavailable")
+            return 0.5
         property_text = ' '.join([
             property_data.get(key, '') for key in [
                 'property_name', 'property_type', 'address', 'city',
         """
         # Use BART for summary generation
+        summarizer = load_model("summarization", "facebook/bart-large-cnn")
         # Generate initial summary
         summary_result = summarizer(property_context, max_length=150, min_length=50, do_sample=False)
         logger.error(f"Error generating property summary: {str(e)}")
         return "Could not generate summary."
+def summarize_text(text):
+    try:
+        if not text or len(text.strip()) < 10:
+            return "No text to summarize."
+        summarizer = load_model("summarization", "facebook/bart-large-cnn")
+        input_length = len(text.split())
+        max_length = max(50, min(150, input_length // 2))
+        min_length = max(20, input_length // 4)
+        summary = summarizer(text[:2000], max_length=max_length, min_length=min_length, do_sample=False)
+        return summary[0]['summary_text']
+    except Exception as e:
+        logger.error(f"Error summarizing text: {str(e)}")
+        return text[:200] + "..." if len(text) > 200 else text
 def classify_fraud(property_details, description):
     """
     Classify the risk of fraud in a property listing using zero-shot classification.
         ]
         # Perform zero-shot classification
+        classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
         result = classifier(text_to_analyze, risk_categories, multi_label=True)
         # Process classification results
 def generate_trust_score(text, image_analysis, pdf_analysis):
     try:
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
         aspects = [
             "complete information provided",
             "verified location",
 def generate_suggestions(text, data=None):
     try:
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
         # Create comprehensive context for analysis
         suggestion_context = text
         if data:
             suggestion_context += f"""
             Additional Context:
+            Property Type: {data.get('property_type', '')}
             Location: {data.get('city', '')}, {data.get('state', '')}
             Size: {data.get('sq_ft', '')} sq.ft.
             Year Built: {data.get('year_built', '')}
                 'quality_metrics': {}
             }
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
         # Enhanced quality categories with more specific indicators
         quality_categories = [
 def analyze_location(data):
     try:
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
+        # Create a detailed location text for analysis
+        location_text = ' '.join(filter(None, [
+            data['address'], data['city'], data['state'], data['country'],
+            data['zip'], f"Lat: {data['latitude']}", f"Long: {data['longitude']}",
+            data['nearby_landmarks']
+        ]))
+        # Classify location completeness
+        categories = ["complete", "partial", "minimal", "missing"]
+        result = classifier(location_text, categories)
+        # Verify location quality
+        location_quality = "unknown"
+        if data['city'] and data['state']:
+            for attempt in range(3):
+                try:
+                    location = geocoder.geocode(f"{data['city']}, {data['state']}, India")
+                    if location:
+                        location_quality = "verified"
+                        break
+                    time.sleep(1)
+                except:
+                    time.sleep(1)
+            else:
+                location_quality = "unverified"
+        # Check coordinates
+        coord_check = "missing"
+        if data['latitude'] and data['longitude']:
+            try:
+                lat, lng = float(data['latitude']), float(data['longitude'])
+                if 6.5 <= lat <= 37.5 and 68.0 <= lng <= 97.5:
+                    coord_check = "in_india"
+                    # Further validate coordinates against known Indian cities
+                    if any(city in data['city'].lower() for city in ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]):
+                        coord_check = "in_metro_city"
+                else:
+                    coord_check = "outside_india"
+            except:
+                coord_check = "invalid"
+        # Calculate location completeness with weighted scoring
+        completeness = calculate_location_completeness(data)
+        # Analyze landmarks
+        landmarks_analysis = {
+            'provided': bool(data['nearby_landmarks']),
+            'count': len(data['nearby_landmarks'].split(',')) if data['nearby_landmarks'] else 0,
+            'types': []
+        }
+        if data['nearby_landmarks']:
+            landmark_types = {
+                'transport': ['station', 'metro', 'bus', 'railway', 'airport'],
+                'education': ['school', 'college', 'university', 'institute'],
+                'healthcare': ['hospital', 'clinic', 'medical'],
+                'shopping': ['mall', 'market', 'shop', 'store'],
+                'entertainment': ['park', 'garden', 'theater', 'cinema'],
+                'business': ['office', 'business', 'corporate']
+            }
+            landmarks = data['nearby_landmarks'].lower().split(',')
+            for landmark in landmarks:
+                for type_name, keywords in landmark_types.items():
+                    if any(keyword in landmark for keyword in keywords):
+                        if type_name not in landmarks_analysis['types']:
+                            landmarks_analysis['types'].append(type_name)
+        # Determine location assessment
+        assessment = "complete" if completeness >= 80 else "partial" if completeness >= 50 else "minimal"
+        # Add city tier information
+        city_tier = "unknown"
+        if data['city']:
+            city_lower = data['city'].lower()
+            if any(city in city_lower for city in ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]):
+                city_tier = "metro"
+            elif any(city in city_lower for city in ["ahmedabad", "jaipur", "surat", "lucknow", "kanpur", "nagpur", "indore", "thane", "bhopal", "visakhapatnam"]):
+                city_tier = "tier2"
+            else:
+                city_tier = "tier3"
         return {
+            'assessment': assessment,
+            'confidence': float(result['scores'][0]),
+            'coordinates_check': coord_check,
+            'landmarks_analysis': landmarks_analysis,
+            'completeness_score': completeness,
+            'location_quality': location_quality,
+            'city_tier': city_tier,
+            'formatted_address': f"{data['address']}, {data['city']}, {data['state']}, India - {data['zip']}",
+            'verification_status': "verified" if location_quality == "verified" and coord_check in ["in_india", "in_metro_city"] else "unverified"
         }
     except Exception as e:
         logger.error(f"Error analyzing location: {str(e)}")
+        return {
+            'assessment': 'error',
+            'confidence': 0.0,
+            'coordinates_check': 'error',
+            'landmarks_analysis': {'provided': False, 'count': 0, 'types': []},
+            'completeness_score': 0,
+            'location_quality': 'error',
+            'city_tier': 'unknown',
+            'formatted_address': '',
+            'verification_status': 'error'
+        }
 def calculate_location_completeness(data):
+    # Define weights for different fields
+    weights = {
+        'address': 0.25,
+        'city': 0.20,
+        'state': 0.15,
+        'country': 0.05,
+        'zip': 0.10,
+        'latitude': 0.10,
+        'longitude': 0.10,
+        'nearby_landmarks': 0.05
+    }
+    # Calculate weighted score
+    score = 0
+    for field, weight in weights.items():
+        if data[field]:
+            score += weight
+    return int(score * 100)
+def analyze_price(data):
     try:
+        price_str = data['market_value'].replace('$', '').replace(',', '').strip()
+        price = float(price_str) if price_str else 0
+        sq_ft = float(re.sub(r'[^\d.]', '', data['sq_ft'])) if data['sq_ft'] else 0
+        price_per_sqft = price / sq_ft if sq_ft else 0
+        if not price:
+            return {
+                'assessment': 'no price',
+                'confidence': 0.0,
+                'price': 0,
+                'formatted_price': '₹0',
+                'price_per_sqft': 0,
+                'formatted_price_per_sqft': '₹0',
+                'price_range': 'unknown',
+                'location_price_assessment': 'cannot assess',
+                'has_price': False,
+                'market_trends': {},
+                'price_factors': {},
+                'risk_indicators': []
+            }
+        # Use a more sophisticated model for price analysis
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
+        # Create a detailed context for price analysis
+        price_context = f"""
+        Property Type: {data.get('property_type', '')}
+        Location: {data.get('city', '')}, {data.get('state', '')}
+        Size: {sq_ft} sq.ft.
+        Price: ₹{price:,.2f}
+        Price per sq.ft.: ₹{price_per_sqft:,.2f}
+        Property Status: {data.get('status', '')}
+        Year Built: {data.get('year_built', '')}
+        Bedrooms: {data.get('bedrooms', '')}
+        Bathrooms: {data.get('bathrooms', '')}
+        Amenities: {data.get('amenities', '')}
+        """
+        # Enhanced price categories with more specific indicators
+        price_categories = [
+            "reasonable market price",
+            "suspiciously low price",
+            "suspiciously high price",
+            "average market price",
+            "luxury property price",
+            "budget property price",
+            "premium property price",
+            "mid-range property price",
+            "overpriced for location",
+            "underpriced for location",
+            "price matches amenities",
+            "price matches property age",
+            "price matches location value",
+            "price matches property condition",
+            "price matches market trends"
+        ]
+        # Analyze price with multiple aspects
+        price_result = classifier(price_context, price_categories, multi_label=True)
+        # Get top classifications with enhanced confidence calculation
+        top_classifications = []
+        for label, score in zip(price_result['labels'][:5], price_result['scores'][:5]):
+            if score > 0.25:  # Lower threshold for better sensitivity
+                top_classifications.append({
+                    'classification': label,
+                    'confidence': float(score)
+                })
+        # Determine price range based on AI classification and market data
+        price_range = 'unknown'
+        if top_classifications:
+            primary_class = top_classifications[0]['classification']
+            if 'luxury' in primary_class:
+                price_range = 'luxury'
+            elif 'premium' in primary_class:
+                price_range = 'premium'
+            elif 'mid-range' in primary_class:
+                price_range = 'mid_range'
+            elif 'budget' in primary_class:
+                price_range = 'budget'
+        # Enhanced location-specific price assessment
+        location_assessment = "unknown"
+        market_trends = {}
+        if data.get('city') and price_per_sqft:
+            city_lower = data['city'].lower()
+        metro_cities = ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]
+            # Define price ranges for different city tiers
+        if any(city in city_lower for city in metro_cities):
+                market_trends = {
+                    'city_tier': 'metro',
+                    'avg_price_range': {
+                        'min': 5000,
+                        'max': 30000,
+                        'trend': 'stable'
+                    },
+                    'price_per_sqft': {
+                        'current': price_per_sqft,
+                        'market_avg': 15000,
+                        'deviation': abs(price_per_sqft - 15000) / 15000 * 100
+                    }
+                }
+                location_assessment = (
+                    "reasonable" if 5000 <= price_per_sqft <= 30000 else
+                    "suspiciously low" if price_per_sqft < 5000 else
+                    "suspiciously high"
+                )
+        else:
+                market_trends = {
+                    'city_tier': 'non-metro',
+                    'avg_price_range': {
+                        'min': 1500,
+                        'max': 15000,
+                        'trend': 'stable'
+                    },
+                    'price_per_sqft': {
+                        'current': price_per_sqft,
+                        'market_avg': 7500,
+                        'deviation': abs(price_per_sqft - 7500) / 7500 * 100
+                    }
+                }
+                location_assessment = (
+                    "reasonable" if 1500 <= price_per_sqft <= 15000 else
+                    "suspiciously low" if price_per_sqft < 1500 else
+                    "suspiciously high"
+                )
+        # Enhanced price analysis factors
+        price_factors = {}
+        risk_indicators = []
+        # Property age factor
+        try:
+            year_built = int(data.get('year_built', 0))
+            current_year = datetime.now().year
+            property_age = current_year - year_built
+            if property_age > 0:
+                depreciation_factor = max(0.5, 1 - (property_age * 0.01))  # 1% depreciation per year, min 50%
+                price_factors['age_factor'] = {
+                    'property_age': property_age,
+                    'depreciation_factor': depreciation_factor,
+                    'impact': 'high' if property_age > 30 else 'medium' if property_age > 15 else 'low'
+                }
+        except:
+            price_factors['age_factor'] = {'error': 'Invalid year built'}
+        # Size factor
+        if sq_ft > 0:
+            size_factor = {
+                'size': sq_ft,
+                'price_per_sqft': price_per_sqft,
+                'efficiency': 'high' if 800 <= sq_ft <= 2000 else 'medium' if 500 <= sq_ft <= 3000 else 'low'
+            }
+            price_factors['size_factor'] = size_factor
+            # Add risk indicators based on size
+            if sq_ft < 300:
+                risk_indicators.append('Unusually small property size')
+            elif sq_ft > 10000:
+                risk_indicators.append('Unusually large property size')
+        # Amenities factor
+        if data.get('amenities'):
+            amenities_list = [a.strip() for a in data['amenities'].split(',')]
+            amenities_score = min(1.0, len(amenities_list) * 0.1)  # 10% per amenity, max 100%
+            price_factors['amenities_factor'] = {
+                'count': len(amenities_list),
+                'score': amenities_score,
+                'impact': 'high' if amenities_score > 0.7 else 'medium' if amenities_score > 0.4 else 'low'
+            }
+        # Calculate overall confidence with weighted factors
+        confidence_weights = {
+            'primary_classification': 0.3,
+            'location_assessment': 0.25,
+            'age_factor': 0.2,
+            'size_factor': 0.15,
+            'amenities_factor': 0.1
         }
+        confidence_scores = []
+        # Primary classification confidence
+        if top_classifications:
+            confidence_scores.append(price_result['scores'][0] * confidence_weights['primary_classification'])
+        # Location assessment confidence
+        location_confidence = 0.8 if location_assessment == "reasonable" else 0.4
+        confidence_scores.append(location_confidence * confidence_weights['location_assessment'])
+        # Age factor confidence
+        if 'age_factor' in price_factors and 'depreciation_factor' in price_factors['age_factor']:
+            age_confidence = price_factors['age_factor']['depreciation_factor']
+            confidence_scores.append(age_confidence * confidence_weights['age_factor'])
+        # Size factor confidence
+        if 'size_factor' in price_factors:
+            size_confidence = 0.8 if price_factors['size_factor']['efficiency'] == 'high' else 0.6
+            confidence_scores.append(size_confidence * confidence_weights['size_factor'])
+        # Amenities factor confidence
+        if 'amenities_factor' in price_factors:
+            amenities_confidence = price_factors['amenities_factor']['score']
+            confidence_scores.append(amenities_confidence * confidence_weights['amenities_factor'])
+        overall_confidence = sum(confidence_scores) / sum(confidence_weights.values())
         return {
+            'assessment': top_classifications[0]['classification'] if top_classifications else 'could not classify',
+            'confidence': float(overall_confidence),
             'price': price,
+            'formatted_price': f"₹{price:,.0f}",
             'price_per_sqft': price_per_sqft,
+            'formatted_price_per_sqft': f"₹{price_per_sqft:,.2f}",
+            'price_range': price_range,
+            'location_price_assessment': location_assessment,
+            'has_price': True,
+            'market_trends': market_trends,
+            'price_factors': price_factors,
+            'risk_indicators': risk_indicators,
+            'top_classifications': top_classifications
         }
     except Exception as e:
         logger.error(f"Error analyzing price: {str(e)}")
+        return {
+            'assessment': 'error',
+            'confidence': 0.0,
+            'price': 0,
+            'formatted_price': '₹0',
+            'price_per_sqft': 0,
+            'formatted_price_per_sqft': '₹0',
+            'price_range': 'unknown',
+            'location_price_assessment': 'error',
+            'has_price': False,
+            'market_trends': {},
+            'price_factors': {},
+            'risk_indicators': [],
+            'top_classifications': []
+        }
 def analyze_legal_details(legal_text):
     try:
+        if not legal_text or len(legal_text.strip()) < 5:
+            return {
+                'assessment': 'insufficient',
                 'confidence': 0.0,
+                'summary': 'No legal details provided',
                 'completeness_score': 0,
+                'potential_issues': False,
+                'legal_metrics': {},
+                'reasoning': 'No legal details provided for analysis',
                 'top_classifications': []
             }
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
         # Enhanced legal categories with more specific indicators
         categories = [
                 })
         # Generate summary using BART
+        summary = summarize_text(legal_text[:1000])
         # Calculate legal metrics with weighted scoring
         legal_metrics = {
             (1 - legal_metrics['risk_level']) * 0.2
         ))
+        return {
+            'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess',
+            'confidence': float(overall_confidence),
+            'summary': summary,
+            'completeness_score': int(completeness_score),
+            'potential_issues': potential_issues,
+            'legal_metrics': legal_metrics,
+            'reasoning': '. '.join(reasoning_parts),
+            'top_classifications': top_classifications
+        }
     except Exception as e:
         logger.error(f"Error analyzing legal details: {str(e)}")
         return {
+            'assessment': 'could not assess',
             'confidence': 0.0,
+            'summary': 'Error analyzing legal details',
             'completeness_score': 0,
+            'potential_issues': False,
+            'legal_metrics': {},
+            'reasoning': 'Technical error occurred during analysis',
             'top_classifications': []
         }
 def check_if_property_related(text):
     try:
+        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
         result = classifier(text[:1000], ["property-related", "non-property-related"])
         is_related = result['labels'][0] == "property-related"
         return {
             'confidence': 0.0
         }
 if __name__ == '__main__':
     # Run Flask app
     app.run(host='0.0.0.0', port=8000, debug=True, use_reloader=False)