sksameermujahid commited on
Commit
b6fdb69
·
verified ·
1 Parent(s): 1d2d77d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +842 -488
app.py CHANGED
@@ -27,40 +27,24 @@ from pyngrok import ngrok
27
  import threading
28
  import asyncio
29
  import concurrent.futures
30
- from concurrent.futures import ThreadPoolExecutor
31
 
32
  app = Flask(__name__)
33
- CORS(app)
34
 
35
  # Configure logging
36
- log_dir = '/app/logs'
37
- os.makedirs(log_dir, exist_ok=True)
38
  logging.basicConfig(
39
  level=logging.INFO,
40
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
41
  handlers=[
42
- logging.StreamHandler(),
43
- logging.FileHandler(os.path.join(log_dir, 'app.log'))
44
  ]
45
  )
46
-
47
  logger = logging.getLogger(__name__)
48
 
49
- # Set Hugging Face cache directory
50
- os.environ['TRANSFORMERS_CACHE'] = '/app/cache'
51
- os.environ['HF_HOME'] = '/app/cache'
52
- os.environ['XDG_CACHE_HOME'] = '/app/cache'
53
-
54
  # Initialize geocoder
55
  geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
56
 
57
- # Model instances
58
- clip_processor = None
59
- clip_model = None
60
- sentence_model = None
61
- nlp = None
62
- zero_shot_classifier = None
63
-
64
  # Cache models
65
  @lru_cache(maxsize=10)
66
  def load_model(task, model_name):
@@ -71,46 +55,31 @@ def load_model(task, model_name):
71
  logger.error(f"Error loading model {model_name}: {str(e)}")
72
  raise
73
 
74
- def get_clip_model():
75
- global clip_processor, clip_model
76
- if clip_processor is None or clip_model is None:
77
- try:
78
- clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
79
- clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
80
- logger.info("CLIP model loaded successfully")
81
- except Exception as e:
82
- logger.error(f"Error loading CLIP model: {str(e)}")
83
- return clip_processor, clip_model
84
-
85
- def get_sentence_model():
86
- global sentence_model
87
- if sentence_model is None:
88
- try:
89
- sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
90
- logger.info("Sentence transformer loaded successfully")
91
- except Exception as e:
92
- logger.error(f"Error loading sentence transformer: {str(e)}")
93
- return sentence_model
94
-
95
- def get_spacy_model():
96
- global nlp
97
- if nlp is None:
98
- try:
99
- nlp = spacy.load('en_core_web_sm')
100
- logger.info("spaCy model loaded successfully")
101
- except Exception as e:
102
- logger.error(f"Error loading spaCy model: {str(e)}")
103
- return nlp
104
-
105
- def get_zero_shot_classifier():
106
- global zero_shot_classifier
107
- if zero_shot_classifier is None:
108
- try:
109
- zero_shot_classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
110
- logger.info("Zero-shot classifier loaded successfully")
111
- except Exception as e:
112
- logger.error(f"Error loading zero-shot classifier: {str(e)}")
113
- return zero_shot_classifier
114
 
115
  def make_json_serializable(obj):
116
  try:
@@ -126,8 +95,6 @@ def make_json_serializable(obj):
126
  return obj.item() if hasattr(obj, 'item') else float(obj)
127
  elif isinstance(obj, np.ndarray):
128
  return obj.tolist()
129
- elif isinstance(obj, np.bool_):
130
- return bool(obj)
131
  else:
132
  return str(obj)
133
  except Exception as e:
@@ -233,232 +200,372 @@ def get_location():
233
  }), 500
234
 
235
  def calculate_final_verdict(results):
 
 
 
 
 
236
  try:
237
  # Initialize verdict components
238
  verdict = {
239
  'status': 'unknown',
240
- 'score': 0.0,
241
  'confidence': 0.0,
 
242
  'reasons': [],
243
- 'warnings': [],
244
  'critical_issues': [],
 
245
  'recommendations': []
246
  }
247
 
248
- # Calculate base score from different analyses
249
- scores = []
250
- weights = []
251
-
252
- # Location analysis
253
- if 'location_analysis' in results:
254
- loc_score = results['location_analysis'].get('completeness_score', 0)
255
- scores.append(loc_score)
256
- weights.append(0.3)
257
-
258
- # Price analysis
259
- if 'price_analysis' in results:
260
- price_conf = results['price_analysis'].get('confidence', 0)
261
- scores.append(price_conf * 100)
262
- weights.append(0.2)
263
-
264
- # Legal analysis
265
- if 'legal_analysis' in results:
266
- legal_score = results['legal_analysis'].get('completeness_score', 0)
267
- scores.append(legal_score)
268
- weights.append(0.3)
269
-
270
- # Specs analysis
271
- if 'specs_analysis' in results:
272
- specs_score = results['specs_analysis'].get('verification_score', 0)
273
- scores.append(specs_score)
274
- weights.append(0.2)
275
-
276
- # Calculate weighted average score
277
- if scores and weights:
278
- verdict['score'] = sum(s * w for s, w in zip(scores, weights)) / sum(weights)
279
- verdict['confidence'] = min(1.0, len(scores) / 4.0) # Confidence based on available analyses
280
-
281
- # Determine status based on score
282
- if verdict['score'] >= 80:
283
- verdict['status'] = 'verified'
284
- elif verdict['score'] >= 60:
285
- verdict['status'] = 'partially_verified'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  else:
287
- verdict['status'] = 'unverified'
288
-
289
- # Add reasons and recommendations
290
- if 'location_analysis' in results:
291
- verdict['reasons'].append(f"Location verification: {results['location_analysis'].get('assessment', 'unknown')}")
292
- if results['location_analysis'].get('location_quality') != 'verified':
293
- verdict['warnings'].append("Location details need verification")
294
-
295
- if 'price_analysis' in results:
296
- verdict['reasons'].append(f"Price assessment: {results['price_analysis'].get('assessment', 'unknown')}")
297
- if results['price_analysis'].get('assessment') == 'suspiciously high price':
298
- verdict['warnings'].append("Property price seems unusually high for the area")
299
-
300
- if 'legal_analysis' in results:
301
- verdict['reasons'].append(f"Legal assessment: {results['legal_analysis'].get('assessment', 'unknown')}")
302
- if results['legal_analysis'].get('potential_issues'):
303
- verdict['critical_issues'].append("Potential legal issues detected")
304
-
305
- if 'specs_analysis' in results:
306
- verdict['reasons'].append(f"Specifications verification: {'valid' if results['specs_analysis'].get('is_valid') else 'invalid'}")
307
- if not results['specs_analysis'].get('is_valid'):
308
- verdict['warnings'].extend(results['specs_analysis'].get('issues', []))
309
-
310
- # Add recommendations
311
- if verdict['status'] == 'unverified':
312
- verdict['recommendations'].append("Additional verification required")
313
- if verdict['warnings']:
314
- verdict['recommendations'].append("Address the warnings before proceeding")
315
- if verdict['critical_issues']:
316
- verdict['recommendations'].append("Resolve critical issues before proceeding")
317
 
318
  return verdict
319
-
320
  except Exception as e:
321
  logger.error(f"Error calculating final verdict: {str(e)}")
322
  return {
323
  'status': 'error',
324
- 'score': 0.0,
325
  'confidence': 0.0,
 
326
  'reasons': [f"Error calculating verdict: {str(e)}"],
327
- 'warnings': [],
328
  'critical_issues': [],
 
329
  'recommendations': ["Unable to determine property status due to an error"]
330
  }
331
 
332
  @app.route('/verify', methods=['POST'])
333
  def verify_property():
334
  try:
335
- results = {
336
- 'location_analysis': {},
337
- 'price_analysis': {},
338
- 'specifications_analysis': {},
339
- 'legal_details_analysis': {},
340
- 'image_analysis': [],
341
- 'pdf_analysis': [],
342
- 'final_verdict': {},
343
- 'suggestions': []
344
- }
345
 
346
- # Check if request is JSON
347
- if request.is_json:
348
- data = request.get_json()
349
- else:
350
- data = {
351
- 'property_name': request.form.get('property_name', '').strip(),
352
- 'property_type': request.form.get('property_type', '').strip(),
353
- 'status': request.form.get('status', '').strip(),
354
- 'address': request.form.get('address', '').strip(),
355
- 'city': request.form.get('city', '').strip(),
356
- 'state': request.form.get('state', '').strip(),
357
- 'country': request.form.get('country', 'India').strip(),
358
- 'pincode': request.form.get('pincode', '').strip(),
359
- 'price': request.form.get('price', '').strip(),
360
- 'market_value': request.form.get('market_value', '').strip(),
361
- 'area': request.form.get('area', '').strip(),
362
- 'bedrooms': request.form.get('bedrooms', '').strip(),
363
- 'bathrooms': request.form.get('bathrooms', '').strip(),
364
- 'floors': request.form.get('floors', '').strip(),
365
- 'furnishing': request.form.get('furnishing', '').strip(),
366
- 'parking': request.form.get('parking', '').strip(),
367
- 'description': request.form.get('description', '').strip(),
368
- 'legal_status': request.form.get('legal_status', '').strip(),
369
- 'possession': request.form.get('possession', '').strip(),
370
- 'age': request.form.get('age', '').strip(),
371
- 'facing': request.form.get('facing', '').strip(),
372
- 'amenities': request.form.get('amenities', '').strip()
373
- }
374
 
375
  # Validate required fields
376
  required_fields = ['property_name', 'property_type', 'address', 'city', 'state']
377
- missing_fields = [field for field in required_fields if not data.get(field)]
378
  if missing_fields:
 
379
  return jsonify({
380
- 'error': 'Missing required fields',
381
- 'missing_fields': missing_fields
382
  }), 400
383
 
384
- # Process images and PDFs in parallel
385
- with ThreadPoolExecutor(max_workers=4) as executor:
386
- # Process images
387
- image_files = request.files.getlist('images')
388
- if image_files:
389
- image_futures = []
390
- for img_file in image_files:
391
- future = executor.submit(process_image, img_file)
392
- image_futures.append(future)
393
-
394
- for future in concurrent.futures.as_completed(image_futures):
 
395
  try:
396
- result = future.result()
397
- results['image_analysis'].append(result)
 
 
 
 
398
  except Exception as e:
399
- logger.error(f"Error processing image: {str(e)}")
400
- results['image_analysis'].append({'error': str(e), 'is_property_related': False})
401
-
402
- # Process PDFs
403
- pdf_files = request.files.getlist('documents')
404
- if pdf_files:
405
- pdf_futures = []
406
- for pdf_file in pdf_files:
407
- future = executor.submit(process_pdf, pdf_file, data)
408
- pdf_futures.append(future)
409
-
410
- for future in concurrent.futures.as_completed(pdf_futures):
 
 
 
411
  try:
412
- result = future.result()
413
- results['pdf_analysis'].append(result)
 
 
 
 
414
  except Exception as e:
415
- logger.error(f"Error processing PDF: {str(e)}")
416
- results['pdf_analysis'].append({'error': str(e)})
417
-
418
- # Run analysis tasks in parallel
419
- with ThreadPoolExecutor(max_workers=4) as executor:
420
- futures = {
421
- 'location_analysis': executor.submit(analyze_location, data),
422
- 'price_analysis': executor.submit(analyze_price, data),
423
- 'specifications_analysis': executor.submit(verify_property_specs, data),
424
- 'legal_details_analysis': executor.submit(analyze_legal_details, data.get('legal_status', ''))
425
- }
426
-
427
- for key, future in futures.items():
428
- try:
429
- results[key] = future.result()
430
- except Exception as e:
431
- logger.error(f"Error in {key}: {str(e)}")
432
- results[key] = {'error': str(e)}
 
 
433
 
434
- # Calculate final verdict and suggestions
435
- results['final_verdict'] = calculate_final_verdict(results)
436
- results['suggestions'] = generate_suggestions(data.get('description', ''), data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
- # Ensure all results are JSON serializable
439
- serializable_results = make_json_serializable(results)
440
- return jsonify(serializable_results)
441
 
442
- except Exception as e:
443
- logger.error(f"Error in verify_property: {str(e)}")
444
- return jsonify({'error': str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
 
446
- def process_image(img_file):
447
- try:
448
- img = Image.open(img_file)
449
- buffered = io.BytesIO()
450
- img.save(buffered, format="JPEG")
451
- img_str = base64.b64encode(buffered.getvalue()).decode()
452
- return analyze_image(img)
453
- except Exception as e:
454
- raise Exception(f"Error processing image {img_file.filename}: {str(e)}")
455
 
456
- def process_pdf(pdf_file, data):
457
- try:
458
- pdf_text = extract_pdf_text(pdf_file)
459
- return analyze_pdf_content(pdf_text, data)
460
  except Exception as e:
461
- raise Exception(f"Error processing PDF {pdf_file.filename}: {str(e)}")
 
 
 
 
 
462
 
463
  def extract_pdf_text(pdf_file):
464
  try:
@@ -474,8 +581,7 @@ def extract_pdf_text(pdf_file):
474
 
475
  def analyze_image(image):
476
  try:
477
- if clip_processor is None or clip_model is None:
478
- get_clip_model()
479
  img_rgb = image.convert('RGB')
480
  inputs = clip_processor(
481
  text=[
@@ -511,6 +617,16 @@ def analyze_image(image):
511
  'is_ai_generated': is_ai_generated,
512
  'authenticity_score': 0.95 if not is_ai_generated else 0.60
513
  }
 
 
 
 
 
 
 
 
 
 
514
  except Exception as e:
515
  logger.error(f"Error analyzing image: {str(e)}")
516
  return {
@@ -542,120 +658,113 @@ def detect_ai_generated_image(image):
542
 
543
  def analyze_pdf_content(document_text, property_data):
544
  try:
545
- if not document_text or len(document_text.strip()) < 10:
546
  return {
547
- 'document_type': 'unknown',
548
- 'confidence': 0.0,
549
  'key_info': {},
550
- 'summary': 'No text content found in document',
551
  'consistency_score': 0.0,
552
- 'risk_indicators': []
 
 
 
 
553
  }
554
 
555
- # Use sentence transformer for document classification
556
- sentence_model = get_sentence_model()
557
-
558
- # Define document types
559
- document_types = [
560
- "sale deed",
561
- "property tax receipt",
562
- "encumbrance certificate",
563
- "building approval",
564
- "occupancy certificate",
565
- "power of attorney",
566
- "lease agreement",
567
- "will",
568
- "gift deed",
569
- "partition deed"
570
- ]
571
 
572
- # Convert document types to embeddings
573
- type_embeddings = sentence_model.encode(document_types)
574
- doc_embedding = sentence_model.encode(document_text)
575
-
576
- # Calculate similarities
577
- similarities = util.pytorch_cos_sim(doc_embedding, type_embeddings)[0]
578
- doc_type_idx = similarities.argmax().item()
579
- doc_type = document_types[doc_type_idx]
580
- confidence = float(similarities[doc_type_idx])
581
 
582
- # Extract key information
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
  key_info = extract_document_key_info(document_text)
584
 
585
- # Generate summary
586
- summary = summarize_text(document_text)
587
-
588
- # Check consistency with property data
589
  consistency_score = check_document_consistency(document_text, property_data)
590
 
591
- # Identify risk indicators
592
- risk_indicators = []
593
- if consistency_score < 0.7:
594
- risk_indicators.append("Document content inconsistent with property details")
595
- if confidence < 0.6:
596
- risk_indicators.append("Low confidence in document type identification")
597
- if len(key_info) < 3:
598
- risk_indicators.append("Limited key information extracted")
599
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
  return {
601
- 'document_type': doc_type,
602
- 'confidence': confidence,
603
  'key_info': key_info,
 
 
604
  'summary': summary,
605
- 'consistency_score': consistency_score,
606
- 'risk_indicators': risk_indicators
 
607
  }
608
  except Exception as e:
609
  logger.error(f"Error analyzing PDF content: {str(e)}")
610
  return {
611
- 'document_type': 'error',
612
- 'confidence': 0.0,
613
  'key_info': {},
614
- 'summary': f'Error analyzing document: {str(e)}',
615
  'consistency_score': 0.0,
616
- 'risk_indicators': ['Error in document analysis']
 
 
 
 
 
617
  }
618
 
619
- def summarize_text(text):
620
- try:
621
- if not text or len(text.strip()) < 10:
622
- return "No text to summarize."
623
-
624
- # Use sentence transformer for summarization
625
- sentence_model = get_sentence_model()
626
-
627
- # Split text into sentences
628
- sentences = text.split('.')
629
- sentences = [s.strip() for s in sentences if s.strip()]
630
-
631
- if not sentences:
632
- return "No valid sentences found."
633
-
634
- # Get sentence embeddings
635
- sentence_embeddings = sentence_model.encode(sentences)
636
-
637
- # Calculate sentence importance (using first sentence and average similarity)
638
- first_sentence_embedding = sentence_embeddings[0]
639
- similarities = util.pytorch_cos_sim(first_sentence_embedding, sentence_embeddings)[0]
640
- avg_similarity = similarities.mean().item()
641
-
642
- # Select important sentences
643
- important_sentences = []
644
- for i, (sentence, similarity) in enumerate(zip(sentences, similarities)):
645
- if similarity > avg_similarity * 0.8: # 80% of average similarity
646
- important_sentences.append(sentence)
647
- if len(important_sentences) >= 3: # Limit to 3 sentences
648
- break
649
-
650
- return '. '.join(important_sentences) + '.'
651
- except Exception as e:
652
- logger.error(f"Error summarizing text: {str(e)}")
653
- return "Error generating summary."
654
-
655
  def check_document_consistency(document_text, property_data):
656
  try:
657
- if sentence_model is None:
658
- get_sentence_model()
 
659
  property_text = ' '.join([
660
  property_data.get(key, '') for key in [
661
  'property_name', 'property_type', 'address', 'city',
@@ -708,7 +817,7 @@ def generate_property_summary(data):
708
  """
709
 
710
  # Use BART for summary generation
711
- summarizer = get_sentence_model()
712
 
713
  # Generate initial summary
714
  summary_result = summarizer(property_context, max_length=150, min_length=50, do_sample=False)
@@ -762,6 +871,20 @@ def generate_property_summary(data):
762
  logger.error(f"Error generating property summary: {str(e)}")
763
  return "Could not generate summary."
764
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
765
  def classify_fraud(property_details, description):
766
  """
767
  Classify the risk of fraud in a property listing using zero-shot classification.
@@ -791,7 +914,7 @@ def classify_fraud(property_details, description):
791
  ]
792
 
793
  # Perform zero-shot classification
794
- classifier = get_zero_shot_classifier()
795
  result = classifier(text_to_analyze, risk_categories, multi_label=True)
796
 
797
  # Process classification results
@@ -903,7 +1026,7 @@ def classify_fraud(property_details, description):
903
 
904
  def generate_trust_score(text, image_analysis, pdf_analysis):
905
  try:
906
- classifier = get_zero_shot_classifier()
907
  aspects = [
908
  "complete information provided",
909
  "verified location",
@@ -1019,14 +1142,14 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
1019
 
1020
  def generate_suggestions(text, data=None):
1021
  try:
1022
- classifier = get_zero_shot_classifier()
1023
 
1024
  # Create comprehensive context for analysis
1025
  suggestion_context = text
1026
  if data:
1027
  suggestion_context += f"""
1028
  Additional Context:
1029
- Property Type: {data.get('property_type', '')} "
1030
  Location: {data.get('city', '')}, {data.get('state', '')}
1031
  Size: {data.get('sq_ft', '')} sq.ft.
1032
  Year Built: {data.get('year_built', '')}
@@ -1267,7 +1390,7 @@ def assess_text_quality(text):
1267
  'quality_metrics': {}
1268
  }
1269
 
1270
- classifier = get_zero_shot_classifier()
1271
 
1272
  # Enhanced quality categories with more specific indicators
1273
  quality_categories = [
@@ -1811,124 +1934,396 @@ def perform_cross_validation(data):
1811
 
1812
  def analyze_location(data):
1813
  try:
1814
- address = data.get('address', '')
1815
- city = data.get('city', '')
1816
- state = data.get('state', '')
1817
- country = data.get('country', 'India')
1818
- pincode = data.get('pincode', '')
1819
- zip_code = data.get('zip', pincode) # Use pincode if zip is not provided
1820
-
1821
- # Build location text
1822
- location_parts = [part for part in [address, city, state, country, zip_code] if part]
1823
- location_text = ', '.join(location_parts)
1824
-
1825
- # Geocode location
1826
- coordinates = None
1827
- try:
1828
- location = geocoder.geocode(location_text, timeout=10)
1829
- if location:
1830
- coordinates = {
1831
- 'latitude': location.latitude,
1832
- 'longitude': location.longitude
1833
- }
1834
- except Exception as e:
1835
- logger.error(f"Error geocoding location: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1836
 
1837
  return {
1838
- 'address': address,
1839
- 'city': city,
1840
- 'state': state,
1841
- 'country': country,
1842
- 'pincode': pincode,
1843
- 'zip': zip_code,
1844
- 'coordinates': coordinates,
1845
- 'completeness_score': calculate_location_completeness(data)
 
1846
  }
1847
  except Exception as e:
1848
  logger.error(f"Error analyzing location: {str(e)}")
1849
- return {'error': str(e)}
 
 
 
 
 
 
 
 
 
 
1850
 
1851
  def calculate_location_completeness(data):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1852
  try:
1853
- # Define weights for different fields
1854
- weights = {
1855
- 'address': 0.2,
1856
- 'city': 0.2,
1857
- 'state': 0.2,
1858
- 'country': 0.1,
1859
- 'pincode': 0.15,
1860
- 'zip': 0.15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1861
  }
1862
 
1863
- # Calculate completeness score
1864
- score = 0
1865
- for field, weight in weights.items():
1866
- if data.get(field):
1867
- score += weight
1868
 
1869
- return min(100, score * 100) # Convert to percentage, max 100
1870
- except Exception as e:
1871
- logger.error(f"Error calculating location completeness: {str(e)}")
1872
- return 0
1873
-
1874
- def analyze_price(data):
1875
- try:
1876
- # Handle empty or invalid price values
1877
- price_str = data.get('price', '0').strip()
1878
- market_value_str = data.get('market_value', price_str).strip()
1879
- area_str = data.get('area', '0').strip()
1880
 
1881
- # Remove currency symbols and commas
1882
- price_str = price_str.replace('', '').replace('$', '').replace(',', '')
1883
- market_value_str = market_value_str.replace('', '').replace('$', '').replace(',', '')
1884
- area_str = area_str.replace(',', '')
1885
 
1886
- # Convert to float with safe defaults
1887
- price = float(price_str) if price_str and price_str.replace('.', '').isdigit() else 0
1888
- market_value = float(market_value_str) if market_value_str and market_value_str.replace('.', '').isdigit() else price
1889
- area = float(area_str) if area_str and area_str.replace('.', '').isdigit() else 0
1890
 
1891
- # Calculate price per sqft
1892
- price_per_sqft = price / area if area > 0 else 0
 
 
 
 
1893
 
1894
  return {
 
 
1895
  'price': price,
1896
- 'area': area,
1897
  'price_per_sqft': price_per_sqft,
1898
- 'market_value': market_value,
1899
- 'price_comparison': {
1900
- 'is_reasonable': price <= market_value * 1.1 if market_value > 0 else True,
1901
- 'price_difference': market_value - price,
1902
- 'price_difference_percentage': ((market_value - price) / market_value) * 100 if market_value > 0 else 0
1903
- }
 
 
1904
  }
1905
  except Exception as e:
1906
  logger.error(f"Error analyzing price: {str(e)}")
1907
- return {'error': str(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1908
 
1909
  def analyze_legal_details(legal_text):
1910
  try:
1911
- # Initialize analysis results
1912
- analysis = {
1913
- 'assessment': 'unknown',
1914
  'confidence': 0.0,
 
1915
  'completeness_score': 0,
1916
- 'potential_issues': False, # Changed to lowercase false
1917
- 'reasoning': '',
1918
- 'summary': '',
1919
- 'legal_metrics': {
1920
- 'completeness': 0.0,
1921
- 'compliance': 0.0,
1922
- 'documentation_quality': 0.0,
1923
- 'risk_level': 0.0
1924
- },
1925
  'top_classifications': []
1926
  }
1927
 
1928
- if not legal_text or len(legal_text.strip()) < 5:
1929
- return analysis
1930
-
1931
- classifier = get_zero_shot_classifier()
1932
 
1933
  # Enhanced legal categories with more specific indicators
1934
  categories = [
@@ -1977,9 +2372,7 @@ def analyze_legal_details(legal_text):
1977
  })
1978
 
1979
  # Generate summary using BART
1980
- summarizer = get_sentence_model()
1981
- summary = summarizer(legal_text[:1000], max_length=150, min_length=50, do_sample=False)
1982
- initial_summary = summary[0]['summary_text']
1983
 
1984
  # Calculate legal metrics with weighted scoring
1985
  legal_metrics = {
@@ -2048,36 +2441,26 @@ def analyze_legal_details(legal_text):
2048
  (1 - legal_metrics['risk_level']) * 0.2
2049
  ))
2050
 
2051
- analysis['assessment'] = top_classifications[0]['classification'] if top_classifications else 'could not assess'
2052
- analysis['confidence'] = float(overall_confidence)
2053
- analysis['summary'] = initial_summary
2054
- analysis['completeness_score'] = int(completeness_score)
2055
- analysis['potential_issues'] = potential_issues
2056
- analysis['legal_metrics'] = legal_metrics
2057
- analysis['reasoning'] = '. '.join(reasoning_parts)
2058
- analysis['top_classifications'] = top_classifications
2059
-
2060
- # Update potential_issues based on analysis
2061
- if potential_issues:
2062
- analysis['potential_issues'] = True # Changed to lowercase true
2063
-
2064
- return analysis
2065
-
2066
  except Exception as e:
2067
  logger.error(f"Error analyzing legal details: {str(e)}")
2068
  return {
2069
- 'assessment': 'error',
2070
  'confidence': 0.0,
 
2071
  'completeness_score': 0,
2072
- 'potential_issues': False, # Changed to lowercase false
2073
- 'reasoning': f'Error analyzing legal details: {str(e)}',
2074
- 'summary': '',
2075
- 'legal_metrics': {
2076
- 'completeness': 0.0,
2077
- 'compliance': 0.0,
2078
- 'documentation_quality': 0.0,
2079
- 'risk_level': 0.0
2080
- },
2081
  'top_classifications': []
2082
  }
2083
 
@@ -2561,7 +2944,7 @@ def assess_image_quality(img):
2561
 
2562
  def check_if_property_related(text):
2563
  try:
2564
- classifier = get_zero_shot_classifier()
2565
  result = classifier(text[:1000], ["property-related", "non-property-related"])
2566
  is_related = result['labels'][0] == "property-related"
2567
  return {
@@ -2575,35 +2958,6 @@ def check_if_property_related(text):
2575
  'confidence': 0.0
2576
  }
2577
 
2578
- # Optimize model loading
2579
- def load_models_in_background():
2580
- """Load models in background to avoid blocking the main thread"""
2581
- def load_models():
2582
- try:
2583
- # Load models in parallel
2584
- with ThreadPoolExecutor(max_workers=4) as executor:
2585
- futures = [
2586
- executor.submit(get_clip_model),
2587
- executor.submit(get_sentence_model),
2588
- executor.submit(get_spacy_model),
2589
- executor.submit(get_zero_shot_classifier)
2590
- ]
2591
-
2592
- # Wait for all models to load
2593
- for future in concurrent.futures.as_completed(futures):
2594
- try:
2595
- future.result()
2596
- except Exception as e:
2597
- logger.error(f"Error loading model: {str(e)}")
2598
- except Exception as e:
2599
- logger.error(f"Error in background model loading: {str(e)}")
2600
-
2601
- # Start model loading in background
2602
- threading.Thread(target=load_models, daemon=True).start()
2603
-
2604
- # Start model loading when the app starts
2605
- load_models_in_background()
2606
-
2607
  if __name__ == '__main__':
2608
  # Run Flask app
2609
  app.run(host='0.0.0.0', port=8000, debug=True, use_reloader=False)
 
27
  import threading
28
  import asyncio
29
  import concurrent.futures
 
30
 
31
  app = Flask(__name__)
32
+ CORS(app) # Enable CORS for frontend
33
 
34
  # Configure logging
 
 
35
  logging.basicConfig(
36
  level=logging.INFO,
37
+ format='%(asctime)s - %(levelname)s - %(message)s',
38
  handlers=[
39
+ logging.FileHandler('app.log'),
40
+ logging.StreamHandler()
41
  ]
42
  )
 
43
  logger = logging.getLogger(__name__)
44
 
 
 
 
 
 
45
  # Initialize geocoder
46
  geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
47
 
 
 
 
 
 
 
 
48
  # Cache models
49
  @lru_cache(maxsize=10)
50
  def load_model(task, model_name):
 
55
  logger.error(f"Error loading model {model_name}: {str(e)}")
56
  raise
57
 
58
+ # Initialize CLIP model
59
+ try:
60
+ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
61
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
62
+ has_clip_model = True
63
+ logger.info("CLIP model loaded successfully")
64
+ except Exception as e:
65
+ logger.error(f"Error loading CLIP model: {str(e)}")
66
+ has_clip_model = False
67
+
68
+ # Initialize sentence transformer
69
+ try:
70
+ sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
71
+ logger.info("Sentence transformer loaded successfully")
72
+ except Exception as e:
73
+ logger.error(f"Error loading sentence transformer: {str(e)}")
74
+ sentence_model = None
75
+
76
+ # Initialize spaCy
77
+ try:
78
+ nlp = spacy.load('en_core_web_md')
79
+ logger.info("spaCy model loaded successfully")
80
+ except Exception as e:
81
+ logger.error(f"Error loading spaCy model: {str(e)}")
82
+ nlp = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  def make_json_serializable(obj):
85
  try:
 
95
  return obj.item() if hasattr(obj, 'item') else float(obj)
96
  elif isinstance(obj, np.ndarray):
97
  return obj.tolist()
 
 
98
  else:
99
  return str(obj)
100
  except Exception as e:
 
200
  }), 500
201
 
202
  def calculate_final_verdict(results):
203
+ """
204
+ Calculate a comprehensive final verdict based on all analysis results.
205
+ This function combines all verification scores, fraud indicators, and quality assessments
206
+ to determine if a property listing is legitimate, suspicious, or fraudulent.
207
+ """
208
  try:
209
  # Initialize verdict components
210
  verdict = {
211
  'status': 'unknown',
 
212
  'confidence': 0.0,
213
+ 'score': 0.0,
214
  'reasons': [],
 
215
  'critical_issues': [],
216
+ 'warnings': [],
217
  'recommendations': []
218
  }
219
 
220
+ # Extract key components from results
221
+ trust_score = results.get('trust_score', {}).get('score', 0)
222
+ fraud_classification = results.get('fraud_classification', {})
223
+ quality_assessment = results.get('quality_assessment', {})
224
+ specs_verification = results.get('specs_verification', {})
225
+ cross_validation = results.get('cross_validation', [])
226
+ location_analysis = results.get('location_analysis', {})
227
+ price_analysis = results.get('price_analysis', {})
228
+ legal_analysis = results.get('legal_analysis', {})
229
+ document_analysis = results.get('document_analysis', {})
230
+ image_analysis = results.get('image_analysis', {})
231
+
232
+ # Calculate component scores (0-100)
233
+ component_scores = {
234
+ 'trust': trust_score,
235
+ 'fraud': 100 - (fraud_classification.get('alert_score', 0) * 100),
236
+ 'quality': quality_assessment.get('score', 0),
237
+ 'specs': specs_verification.get('verification_score', 0),
238
+ 'location': location_analysis.get('completeness_score', 0),
239
+ 'price': price_analysis.get('confidence', 0) * 100 if price_analysis.get('has_price') else 0,
240
+ 'legal': legal_analysis.get('completeness_score', 0),
241
+ 'documents': min(100, (document_analysis.get('pdf_count', 0) / 3) * 100) if document_analysis.get('pdf_count') else 0,
242
+ 'images': min(100, (image_analysis.get('image_count', 0) / 5) * 100) if image_analysis.get('image_count') else 0
243
+ }
244
+
245
+ # Calculate weighted final score with adjusted weights
246
+ weights = {
247
+ 'trust': 0.20,
248
+ 'fraud': 0.25, # Increased weight for fraud detection
249
+ 'quality': 0.15,
250
+ 'specs': 0.10,
251
+ 'location': 0.10,
252
+ 'price': 0.05,
253
+ 'legal': 0.05,
254
+ 'documents': 0.05,
255
+ 'images': 0.05
256
+ }
257
+
258
+ final_score = sum(score * weights.get(component, 0) for component, score in component_scores.items())
259
+ verdict['score'] = final_score
260
+
261
+ # Determine verdict status based on multiple factors
262
+ fraud_level = fraud_classification.get('alert_level', 'minimal')
263
+ high_risk_indicators = len(fraud_classification.get('high_risk', []))
264
+ critical_issues = []
265
+ warnings = []
266
+
267
+ # Check for critical issues
268
+ if fraud_level in ['critical', 'high']:
269
+ critical_issues.append(f"High fraud risk detected: {fraud_level} alert level")
270
+
271
+ if trust_score < 40:
272
+ critical_issues.append(f"Very low trust score: {trust_score}%")
273
+
274
+ if quality_assessment.get('score', 0) < 30:
275
+ critical_issues.append(f"Very low content quality: {quality_assessment.get('score', 0)}%")
276
+
277
+ if specs_verification.get('verification_score', 0) < 40:
278
+ critical_issues.append(f"Property specifications verification failed: {specs_verification.get('verification_score', 0)}%")
279
+
280
+ # Check for warnings
281
+ if fraud_level == 'medium':
282
+ warnings.append(f"Medium fraud risk detected: {fraud_level} alert level")
283
+
284
+ if trust_score < 60:
285
+ warnings.append(f"Low trust score: {trust_score}%")
286
+
287
+ if quality_assessment.get('score', 0) < 60:
288
+ warnings.append(f"Low content quality: {quality_assessment.get('score', 0)}%")
289
+
290
+ if specs_verification.get('verification_score', 0) < 70:
291
+ warnings.append(f"Property specifications have issues: {specs_verification.get('verification_score', 0)}%")
292
+
293
+ # Check cross-validation results
294
+ for check in cross_validation:
295
+ if check.get('status') in ['inconsistent', 'invalid', 'suspicious', 'no_match']:
296
+ warnings.append(f"Cross-validation issue: {check.get('message', 'Unknown issue')}")
297
+
298
+ # Check for missing critical information
299
+ missing_critical = []
300
+ if not location_analysis.get('completeness_score', 0) > 70:
301
+ missing_critical.append("Location information is incomplete")
302
+
303
+ if not price_analysis.get('has_price', False):
304
+ missing_critical.append("Price information is missing")
305
+
306
+ if not legal_analysis.get('completeness_score', 0) > 70:
307
+ missing_critical.append("Legal information is incomplete")
308
+
309
+ if document_analysis.get('pdf_count', 0) == 0:
310
+ missing_critical.append("No supporting documents provided")
311
+
312
+ if image_analysis.get('image_count', 0) == 0:
313
+ missing_critical.append("No property images provided")
314
+
315
+ if missing_critical:
316
+ warnings.append(f"Missing critical information: {', '.join(missing_critical)}")
317
+
318
+ # Enhanced verdict determination with more strict criteria
319
+ if critical_issues or (fraud_level in ['critical', 'high'] and trust_score < 50) or high_risk_indicators > 0:
320
+ verdict['status'] = 'fraudulent'
321
+ verdict['confidence'] = min(100, max(70, 100 - (trust_score * 0.5)))
322
+ elif warnings or (fraud_level == 'medium' and trust_score < 70) or specs_verification.get('verification_score', 0) < 60:
323
+ verdict['status'] = 'suspicious'
324
+ verdict['confidence'] = min(100, max(50, trust_score * 0.8))
325
+ else:
326
+ verdict['status'] = 'legitimate'
327
+ verdict['confidence'] = min(100, max(70, trust_score * 0.9))
328
+
329
+ # Add reasons to verdict
330
+ verdict['critical_issues'] = critical_issues
331
+ verdict['warnings'] = warnings
332
+
333
+ # Add recommendations based on issues
334
+ if critical_issues:
335
+ verdict['recommendations'].append("Do not proceed with this property listing")
336
+ verdict['recommendations'].append("Report this listing to the platform")
337
+ elif warnings:
338
+ verdict['recommendations'].append("Proceed with extreme caution")
339
+ verdict['recommendations'].append("Request additional verification documents")
340
+ verdict['recommendations'].append("Verify all information with independent sources")
341
  else:
342
+ verdict['recommendations'].append("Proceed with standard due diligence")
343
+ verdict['recommendations'].append("Verify final details before transaction")
344
+
345
+ # Add specific recommendations based on missing information
346
+ for missing in missing_critical:
347
+ verdict['recommendations'].append(f"Request {missing.lower()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
  return verdict
 
350
  except Exception as e:
351
  logger.error(f"Error calculating final verdict: {str(e)}")
352
  return {
353
  'status': 'error',
 
354
  'confidence': 0.0,
355
+ 'score': 0.0,
356
  'reasons': [f"Error calculating verdict: {str(e)}"],
 
357
  'critical_issues': [],
358
+ 'warnings': [],
359
  'recommendations': ["Unable to determine property status due to an error"]
360
  }
361
 
362
  @app.route('/verify', methods=['POST'])
363
  def verify_property():
364
  try:
365
+ if not request.form and not request.files:
366
+ logger.warning("No form data or files provided")
367
+ return jsonify({
368
+ 'error': 'No data provided',
369
+ 'status': 'error'
370
+ }), 400
 
 
 
 
371
 
372
+ # Extract form data
373
+ data = {
374
+ 'property_name': request.form.get('property_name', '').strip(),
375
+ 'property_type': request.form.get('property_type', '').strip(),
376
+ 'status': request.form.get('status', '').strip(),
377
+ 'description': request.form.get('description', '').strip(),
378
+ 'address': request.form.get('address', '').strip(),
379
+ 'city': request.form.get('city', '').strip(),
380
+ 'state': request.form.get('state', '').strip(),
381
+ 'country': request.form.get('country', 'India').strip(),
382
+ 'zip': request.form.get('zip', '').strip(),
383
+ 'latitude': request.form.get('latitude', '').strip(),
384
+ 'longitude': request.form.get('longitude', '').strip(),
385
+ 'bedrooms': request.form.get('bedrooms', '').strip(),
386
+ 'bathrooms': request.form.get('bathrooms', '').strip(),
387
+ 'total_rooms': request.form.get('total_rooms', '').strip(),
388
+ 'year_built': request.form.get('year_built', '').strip(),
389
+ 'parking': request.form.get('parking', '').strip(),
390
+ 'sq_ft': request.form.get('sq_ft', '').strip(),
391
+ 'market_value': request.form.get('market_value', '').strip(),
392
+ 'amenities': request.form.get('amenities', '').strip(),
393
+ 'nearby_landmarks': request.form.get('nearby_landmarks', '').strip(),
394
+ 'legal_details': request.form.get('legal_details', '').strip()
395
+ }
 
 
 
 
396
 
397
  # Validate required fields
398
  required_fields = ['property_name', 'property_type', 'address', 'city', 'state']
399
+ missing_fields = [field for field in required_fields if not data[field]]
400
  if missing_fields:
401
+ logger.warning(f"Missing required fields: {', '.join(missing_fields)}")
402
  return jsonify({
403
+ 'error': f"Missing required fields: {', '.join(missing_fields)}",
404
+ 'status': 'error'
405
  }), 400
406
 
407
+ # Process images
408
+ images = []
409
+ image_analysis = []
410
+ if 'images' in request.files:
411
+ # Get unique image files by filename to prevent duplicates
412
+ image_files = {}
413
+ for img_file in request.files.getlist('images'):
414
+ if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
415
+ image_files[img_file.filename] = img_file
416
+
417
+ # Process unique images
418
+ for img_file in image_files.values():
419
  try:
420
+ img = Image.open(img_file)
421
+ buffered = io.BytesIO()
422
+ img.save(buffered, format="JPEG")
423
+ img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
424
+ images.append(img_str)
425
+ image_analysis.append(analyze_image(img))
426
  except Exception as e:
427
+ logger.error(f"Error processing image {img_file.filename}: {str(e)}")
428
+ image_analysis.append({'error': str(e), 'is_property_related': False})
429
+
430
+ # Process PDFs
431
+ pdf_texts = []
432
+ pdf_analysis = []
433
+ if 'documents' in request.files:
434
+ # Get unique PDF files by filename to prevent duplicates
435
+ pdf_files = {}
436
+ for pdf_file in request.files.getlist('documents'):
437
+ if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
438
+ pdf_files[pdf_file.filename] = pdf_file
439
+
440
+ # Process unique PDFs
441
+ for pdf_file in pdf_files.values():
442
  try:
443
+ pdf_text = extract_pdf_text(pdf_file)
444
+ pdf_texts.append({
445
+ 'filename': pdf_file.filename,
446
+ 'text': pdf_text
447
+ })
448
+ pdf_analysis.append(analyze_pdf_content(pdf_text, data))
449
  except Exception as e:
450
+ logger.error(f"Error processing PDF {pdf_file.filename}: {str(e)}")
451
+ pdf_analysis.append({'error': str(e)})
452
+
453
+ # Create consolidated text for analysis
454
+ consolidated_text = f"""
455
+ Property Name: {data['property_name']}
456
+ Property Type: {data['property_type']}
457
+ Status: {data['status']}
458
+ Description: {data['description']}
459
+ Location: {data['address']}, {data['city']}, {data['state']}, {data['country']}, {data['zip']}
460
+ Coordinates: Lat {data['latitude']}, Long {data['longitude']}
461
+ Specifications: {data['bedrooms']} bedrooms, {data['bathrooms']} bathrooms, {data['total_rooms']} total rooms
462
+ Year Built: {data['year_built']}
463
+ Parking: {data['parking']}
464
+ Size: {data['sq_ft']} sq. ft.
465
+ Market Value: ₹{data['market_value']}
466
+ Amenities: {data['amenities']}
467
+ Nearby Landmarks: {data['nearby_landmarks']}
468
+ Legal Details: {data['legal_details']}
469
+ """
470
 
471
+ # Process description translation if needed
472
+ try:
473
+ description = data['description']
474
+ if description and len(description) > 10:
475
+ text_language = detect(description)
476
+ if text_language != 'en':
477
+ translated_description = GoogleTranslator(source=text_language, target='en').translate(description)
478
+ data['description_translated'] = translated_description
479
+ else:
480
+ data['description_translated'] = description
481
+ else:
482
+ data['description_translated'] = description
483
+ except Exception as e:
484
+ logger.error(f"Error in language detection/translation: {str(e)}")
485
+ data['description_translated'] = data['description']
486
+
487
+ # Run all analyses in parallel using asyncio
488
+ async def run_analyses():
489
+ with concurrent.futures.ThreadPoolExecutor() as executor:
490
+ loop = asyncio.get_event_loop()
491
+ tasks = [
492
+ loop.run_in_executor(executor, generate_property_summary, data),
493
+ loop.run_in_executor(executor, classify_fraud, consolidated_text, data),
494
+ loop.run_in_executor(executor, generate_trust_score, consolidated_text, image_analysis, pdf_analysis),
495
+ loop.run_in_executor(executor, generate_suggestions, consolidated_text, data),
496
+ loop.run_in_executor(executor, assess_text_quality, data['description_translated']),
497
+ loop.run_in_executor(executor, verify_address, data),
498
+ loop.run_in_executor(executor, perform_cross_validation, data),
499
+ loop.run_in_executor(executor, analyze_location, data),
500
+ loop.run_in_executor(executor, analyze_price, data),
501
+ loop.run_in_executor(executor, analyze_legal_details, data['legal_details']),
502
+ loop.run_in_executor(executor, verify_property_specs, data),
503
+ loop.run_in_executor(executor, analyze_market_value, data)
504
+ ]
505
+ results = await asyncio.gather(*tasks)
506
+ return results
507
+
508
+ # Run analyses and get results
509
+ loop = asyncio.new_event_loop()
510
+ asyncio.set_event_loop(loop)
511
+ analysis_results = loop.run_until_complete(run_analyses())
512
+ loop.close()
513
+
514
+ # Unpack results
515
+ summary, fraud_classification, (trust_score, trust_reasoning), suggestions, quality_assessment, \
516
+ address_verification, cross_validation, location_analysis, price_analysis, legal_analysis, \
517
+ specs_verification, market_analysis = analysis_results
518
+
519
+ # Prepare response
520
+ document_analysis = {
521
+ 'pdf_count': len(pdf_texts),
522
+ 'pdf_texts': pdf_texts,
523
+ 'pdf_analysis': pdf_analysis
524
+ }
525
+ image_results = {
526
+ 'image_count': len(images),
527
+ 'image_analysis': image_analysis
528
+ }
529
 
530
+ report_id = str(uuid.uuid4())
 
 
531
 
532
+ # Create results dictionary
533
+ results = {
534
+ 'report_id': report_id,
535
+ 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
536
+ 'summary': summary,
537
+ 'fraud_classification': fraud_classification,
538
+ 'trust_score': {
539
+ 'score': trust_score,
540
+ 'reasoning': trust_reasoning
541
+ },
542
+ 'suggestions': suggestions,
543
+ 'quality_assessment': quality_assessment,
544
+ 'address_verification': address_verification,
545
+ 'cross_validation': cross_validation,
546
+ 'location_analysis': location_analysis,
547
+ 'price_analysis': price_analysis,
548
+ 'legal_analysis': legal_analysis,
549
+ 'document_analysis': document_analysis,
550
+ 'image_analysis': image_results,
551
+ 'specs_verification': specs_verification,
552
+ 'market_analysis': market_analysis,
553
+ 'images': images
554
+ }
555
+
556
+ # Calculate final verdict
557
+ final_verdict = calculate_final_verdict(results)
558
+ results['final_verdict'] = final_verdict
559
 
560
+ return jsonify(make_json_serializable(results))
 
 
 
 
 
 
 
 
561
 
 
 
 
 
562
  except Exception as e:
563
+ logger.error(f"Error in verify_property: {str(e)}")
564
+ return jsonify({
565
+ 'error': 'Server error occurred. Please try again later.',
566
+ 'status': 'error',
567
+ 'details': str(e)
568
+ }), 500
569
 
570
  def extract_pdf_text(pdf_file):
571
  try:
 
581
 
582
  def analyze_image(image):
583
  try:
584
+ if has_clip_model:
 
585
  img_rgb = image.convert('RGB')
586
  inputs = clip_processor(
587
  text=[
 
617
  'is_ai_generated': is_ai_generated,
618
  'authenticity_score': 0.95 if not is_ai_generated else 0.60
619
  }
620
+ else:
621
+ logger.warning("CLIP model unavailable")
622
+ return {
623
+ 'is_property_related': False,
624
+ 'property_confidence': 0.0,
625
+ 'top_predictions': [],
626
+ 'image_quality': assess_image_quality(image),
627
+ 'is_ai_generated': False,
628
+ 'authenticity_score': 0.5
629
+ }
630
  except Exception as e:
631
  logger.error(f"Error analyzing image: {str(e)}")
632
  return {
 
658
 
659
  def analyze_pdf_content(document_text, property_data):
660
  try:
661
+ if not document_text:
662
  return {
663
+ 'document_type': {'classification': 'unknown', 'confidence': 0.0},
664
+ 'authenticity': {'assessment': 'could not verify', 'confidence': 0.0},
665
  'key_info': {},
 
666
  'consistency_score': 0.0,
667
+ 'is_property_related': False,
668
+ 'summary': 'Empty document',
669
+ 'has_signatures': False,
670
+ 'has_dates': False,
671
+ 'verification_score': 0.0
672
  }
673
 
674
+ # Use a more sophisticated model for document classification
675
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
 
677
+ # Enhanced document types with more specific categories
678
+ doc_types = [
679
+ "property deed", "sales agreement", "mortgage document",
680
+ "property tax record", "title document", "khata certificate",
681
+ "encumbrance certificate", "lease agreement", "rental agreement",
682
+ "property registration document", "building permit", "other document"
683
+ ]
 
 
684
 
685
+ # Analyze document type with context
686
+ doc_context = f"{document_text[:1000]} property_type:{property_data.get('property_type', '')} location:{property_data.get('city', '')}"
687
+ doc_result = classifier(doc_context, doc_types)
688
+ doc_type = doc_result['labels'][0]
689
+ doc_confidence = doc_result['scores'][0]
690
+
691
+ # Enhanced authenticity check with multiple aspects
692
+ authenticity_aspects = [
693
+ "authentic legal document",
694
+ "questionable document",
695
+ "forged document",
696
+ "template document",
697
+ "official document"
698
+ ]
699
+ authenticity_result = classifier(document_text[:1000], authenticity_aspects)
700
+ authenticity = "likely authentic" if authenticity_result['labels'][0] == "authentic legal document" else "questionable"
701
+ authenticity_confidence = authenticity_result['scores'][0]
702
+
703
+ # Extract key information using NLP
704
  key_info = extract_document_key_info(document_text)
705
 
706
+ # Enhanced consistency check
 
 
 
707
  consistency_score = check_document_consistency(document_text, property_data)
708
 
709
+ # Property relation check with context
710
+ property_context = f"{document_text[:1000]} property:{property_data.get('property_name', '')} type:{property_data.get('property_type', '')}"
711
+ is_property_related = check_if_property_related(property_context)['is_related']
 
 
 
 
 
712
 
713
+ # Generate summary using BART
714
+ summary = summarize_text(document_text[:2000])
715
+
716
+ # Enhanced signature and date detection
717
+ has_signatures = bool(re.search(r'(?:sign|signature|signed|witness|notary|authorized).{0,50}(?:by|of|for)', document_text.lower()))
718
+ has_dates = bool(re.search(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}|\d{4}[/-]\d{1,2}[/-]\d{1,2}', document_text))
719
+
720
+ # Calculate verification score with weighted components
721
+ verification_weights = {
722
+ 'doc_type': 0.3,
723
+ 'authenticity': 0.3,
724
+ 'consistency': 0.2,
725
+ 'property_relation': 0.1,
726
+ 'signatures_dates': 0.1
727
+ }
728
+
729
+ verification_score = (
730
+ doc_confidence * verification_weights['doc_type'] +
731
+ authenticity_confidence * verification_weights['authenticity'] +
732
+ consistency_score * verification_weights['consistency'] +
733
+ float(is_property_related) * verification_weights['property_relation'] +
734
+ float(has_signatures and has_dates) * verification_weights['signatures_dates']
735
+ )
736
+
737
  return {
738
+ 'document_type': {'classification': doc_type, 'confidence': float(doc_confidence)},
739
+ 'authenticity': {'assessment': authenticity, 'confidence': float(authenticity_confidence)},
740
  'key_info': key_info,
741
+ 'consistency_score': float(consistency_score),
742
+ 'is_property_related': is_property_related,
743
  'summary': summary,
744
+ 'has_signatures': has_signatures,
745
+ 'has_dates': has_dates,
746
+ 'verification_score': float(verification_score)
747
  }
748
  except Exception as e:
749
  logger.error(f"Error analyzing PDF content: {str(e)}")
750
  return {
751
+ 'document_type': {'classification': 'unknown', 'confidence': 0.0},
752
+ 'authenticity': {'assessment': 'could not verify', 'confidence': 0.0},
753
  'key_info': {},
 
754
  'consistency_score': 0.0,
755
+ 'is_property_related': False,
756
+ 'summary': 'Could not analyze document',
757
+ 'has_signatures': False,
758
+ 'has_dates': False,
759
+ 'verification_score': 0.0,
760
+ 'error': str(e)
761
  }
762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  def check_document_consistency(document_text, property_data):
764
  try:
765
+ if not sentence_model:
766
+ logger.warning("Sentence model unavailable")
767
+ return 0.5
768
  property_text = ' '.join([
769
  property_data.get(key, '') for key in [
770
  'property_name', 'property_type', 'address', 'city',
 
817
  """
818
 
819
  # Use BART for summary generation
820
+ summarizer = load_model("summarization", "facebook/bart-large-cnn")
821
 
822
  # Generate initial summary
823
  summary_result = summarizer(property_context, max_length=150, min_length=50, do_sample=False)
 
871
  logger.error(f"Error generating property summary: {str(e)}")
872
  return "Could not generate summary."
873
 
874
+ def summarize_text(text):
875
+ try:
876
+ if not text or len(text.strip()) < 10:
877
+ return "No text to summarize."
878
+ summarizer = load_model("summarization", "facebook/bart-large-cnn")
879
+ input_length = len(text.split())
880
+ max_length = max(50, min(150, input_length // 2))
881
+ min_length = max(20, input_length // 4)
882
+ summary = summarizer(text[:2000], max_length=max_length, min_length=min_length, do_sample=False)
883
+ return summary[0]['summary_text']
884
+ except Exception as e:
885
+ logger.error(f"Error summarizing text: {str(e)}")
886
+ return text[:200] + "..." if len(text) > 200 else text
887
+
888
  def classify_fraud(property_details, description):
889
  """
890
  Classify the risk of fraud in a property listing using zero-shot classification.
 
914
  ]
915
 
916
  # Perform zero-shot classification
917
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
918
  result = classifier(text_to_analyze, risk_categories, multi_label=True)
919
 
920
  # Process classification results
 
1026
 
1027
  def generate_trust_score(text, image_analysis, pdf_analysis):
1028
  try:
1029
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
1030
  aspects = [
1031
  "complete information provided",
1032
  "verified location",
 
1142
 
1143
  def generate_suggestions(text, data=None):
1144
  try:
1145
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
1146
 
1147
  # Create comprehensive context for analysis
1148
  suggestion_context = text
1149
  if data:
1150
  suggestion_context += f"""
1151
  Additional Context:
1152
+ Property Type: {data.get('property_type', '')}
1153
  Location: {data.get('city', '')}, {data.get('state', '')}
1154
  Size: {data.get('sq_ft', '')} sq.ft.
1155
  Year Built: {data.get('year_built', '')}
 
1390
  'quality_metrics': {}
1391
  }
1392
 
1393
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
1394
 
1395
  # Enhanced quality categories with more specific indicators
1396
  quality_categories = [
 
1934
 
1935
  def analyze_location(data):
1936
  try:
1937
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
1938
+
1939
+ # Create a detailed location text for analysis
1940
+ location_text = ' '.join(filter(None, [
1941
+ data['address'], data['city'], data['state'], data['country'],
1942
+ data['zip'], f"Lat: {data['latitude']}", f"Long: {data['longitude']}",
1943
+ data['nearby_landmarks']
1944
+ ]))
1945
+
1946
+ # Classify location completeness
1947
+ categories = ["complete", "partial", "minimal", "missing"]
1948
+ result = classifier(location_text, categories)
1949
+
1950
+ # Verify location quality
1951
+ location_quality = "unknown"
1952
+ if data['city'] and data['state']:
1953
+ for attempt in range(3):
1954
+ try:
1955
+ location = geocoder.geocode(f"{data['city']}, {data['state']}, India")
1956
+ if location:
1957
+ location_quality = "verified"
1958
+ break
1959
+ time.sleep(1)
1960
+ except:
1961
+ time.sleep(1)
1962
+ else:
1963
+ location_quality = "unverified"
1964
+
1965
+ # Check coordinates
1966
+ coord_check = "missing"
1967
+ if data['latitude'] and data['longitude']:
1968
+ try:
1969
+ lat, lng = float(data['latitude']), float(data['longitude'])
1970
+ if 6.5 <= lat <= 37.5 and 68.0 <= lng <= 97.5:
1971
+ coord_check = "in_india"
1972
+ # Further validate coordinates against known Indian cities
1973
+ if any(city in data['city'].lower() for city in ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]):
1974
+ coord_check = "in_metro_city"
1975
+ else:
1976
+ coord_check = "outside_india"
1977
+ except:
1978
+ coord_check = "invalid"
1979
+
1980
+ # Calculate location completeness with weighted scoring
1981
+ completeness = calculate_location_completeness(data)
1982
+
1983
+ # Analyze landmarks
1984
+ landmarks_analysis = {
1985
+ 'provided': bool(data['nearby_landmarks']),
1986
+ 'count': len(data['nearby_landmarks'].split(',')) if data['nearby_landmarks'] else 0,
1987
+ 'types': []
1988
+ }
1989
+
1990
+ if data['nearby_landmarks']:
1991
+ landmark_types = {
1992
+ 'transport': ['station', 'metro', 'bus', 'railway', 'airport'],
1993
+ 'education': ['school', 'college', 'university', 'institute'],
1994
+ 'healthcare': ['hospital', 'clinic', 'medical'],
1995
+ 'shopping': ['mall', 'market', 'shop', 'store'],
1996
+ 'entertainment': ['park', 'garden', 'theater', 'cinema'],
1997
+ 'business': ['office', 'business', 'corporate']
1998
+ }
1999
+
2000
+ landmarks = data['nearby_landmarks'].lower().split(',')
2001
+ for landmark in landmarks:
2002
+ for type_name, keywords in landmark_types.items():
2003
+ if any(keyword in landmark for keyword in keywords):
2004
+ if type_name not in landmarks_analysis['types']:
2005
+ landmarks_analysis['types'].append(type_name)
2006
+
2007
+ # Determine location assessment
2008
+ assessment = "complete" if completeness >= 80 else "partial" if completeness >= 50 else "minimal"
2009
+
2010
+ # Add city tier information
2011
+ city_tier = "unknown"
2012
+ if data['city']:
2013
+ city_lower = data['city'].lower()
2014
+ if any(city in city_lower for city in ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]):
2015
+ city_tier = "metro"
2016
+ elif any(city in city_lower for city in ["ahmedabad", "jaipur", "surat", "lucknow", "kanpur", "nagpur", "indore", "thane", "bhopal", "visakhapatnam"]):
2017
+ city_tier = "tier2"
2018
+ else:
2019
+ city_tier = "tier3"
2020
 
2021
  return {
2022
+ 'assessment': assessment,
2023
+ 'confidence': float(result['scores'][0]),
2024
+ 'coordinates_check': coord_check,
2025
+ 'landmarks_analysis': landmarks_analysis,
2026
+ 'completeness_score': completeness,
2027
+ 'location_quality': location_quality,
2028
+ 'city_tier': city_tier,
2029
+ 'formatted_address': f"{data['address']}, {data['city']}, {data['state']}, India - {data['zip']}",
2030
+ 'verification_status': "verified" if location_quality == "verified" and coord_check in ["in_india", "in_metro_city"] else "unverified"
2031
  }
2032
  except Exception as e:
2033
  logger.error(f"Error analyzing location: {str(e)}")
2034
+ return {
2035
+ 'assessment': 'error',
2036
+ 'confidence': 0.0,
2037
+ 'coordinates_check': 'error',
2038
+ 'landmarks_analysis': {'provided': False, 'count': 0, 'types': []},
2039
+ 'completeness_score': 0,
2040
+ 'location_quality': 'error',
2041
+ 'city_tier': 'unknown',
2042
+ 'formatted_address': '',
2043
+ 'verification_status': 'error'
2044
+ }
2045
 
2046
  def calculate_location_completeness(data):
2047
+ # Define weights for different fields
2048
+ weights = {
2049
+ 'address': 0.25,
2050
+ 'city': 0.20,
2051
+ 'state': 0.15,
2052
+ 'country': 0.05,
2053
+ 'zip': 0.10,
2054
+ 'latitude': 0.10,
2055
+ 'longitude': 0.10,
2056
+ 'nearby_landmarks': 0.05
2057
+ }
2058
+
2059
+ # Calculate weighted score
2060
+ score = 0
2061
+ for field, weight in weights.items():
2062
+ if data[field]:
2063
+ score += weight
2064
+
2065
+ return int(score * 100)
2066
+
2067
+ def analyze_price(data):
2068
  try:
2069
+ price_str = data['market_value'].replace('$', '').replace(',', '').strip()
2070
+ price = float(price_str) if price_str else 0
2071
+ sq_ft = float(re.sub(r'[^\d.]', '', data['sq_ft'])) if data['sq_ft'] else 0
2072
+ price_per_sqft = price / sq_ft if sq_ft else 0
2073
+
2074
+ if not price:
2075
+ return {
2076
+ 'assessment': 'no price',
2077
+ 'confidence': 0.0,
2078
+ 'price': 0,
2079
+ 'formatted_price': '₹0',
2080
+ 'price_per_sqft': 0,
2081
+ 'formatted_price_per_sqft': '₹0',
2082
+ 'price_range': 'unknown',
2083
+ 'location_price_assessment': 'cannot assess',
2084
+ 'has_price': False,
2085
+ 'market_trends': {},
2086
+ 'price_factors': {},
2087
+ 'risk_indicators': []
2088
+ }
2089
+
2090
+ # Use a more sophisticated model for price analysis
2091
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
2092
+
2093
+ # Create a detailed context for price analysis
2094
+ price_context = f"""
2095
+ Property Type: {data.get('property_type', '')}
2096
+ Location: {data.get('city', '')}, {data.get('state', '')}
2097
+ Size: {sq_ft} sq.ft.
2098
+ Price: ₹{price:,.2f}
2099
+ Price per sq.ft.: ₹{price_per_sqft:,.2f}
2100
+ Property Status: {data.get('status', '')}
2101
+ Year Built: {data.get('year_built', '')}
2102
+ Bedrooms: {data.get('bedrooms', '')}
2103
+ Bathrooms: {data.get('bathrooms', '')}
2104
+ Amenities: {data.get('amenities', '')}
2105
+ """
2106
+
2107
+ # Enhanced price categories with more specific indicators
2108
+ price_categories = [
2109
+ "reasonable market price",
2110
+ "suspiciously low price",
2111
+ "suspiciously high price",
2112
+ "average market price",
2113
+ "luxury property price",
2114
+ "budget property price",
2115
+ "premium property price",
2116
+ "mid-range property price",
2117
+ "overpriced for location",
2118
+ "underpriced for location",
2119
+ "price matches amenities",
2120
+ "price matches property age",
2121
+ "price matches location value",
2122
+ "price matches property condition",
2123
+ "price matches market trends"
2124
+ ]
2125
+
2126
+ # Analyze price with multiple aspects
2127
+ price_result = classifier(price_context, price_categories, multi_label=True)
2128
+
2129
+ # Get top classifications with enhanced confidence calculation
2130
+ top_classifications = []
2131
+ for label, score in zip(price_result['labels'][:5], price_result['scores'][:5]):
2132
+ if score > 0.25: # Lower threshold for better sensitivity
2133
+ top_classifications.append({
2134
+ 'classification': label,
2135
+ 'confidence': float(score)
2136
+ })
2137
+
2138
+ # Determine price range based on AI classification and market data
2139
+ price_range = 'unknown'
2140
+ if top_classifications:
2141
+ primary_class = top_classifications[0]['classification']
2142
+ if 'luxury' in primary_class:
2143
+ price_range = 'luxury'
2144
+ elif 'premium' in primary_class:
2145
+ price_range = 'premium'
2146
+ elif 'mid-range' in primary_class:
2147
+ price_range = 'mid_range'
2148
+ elif 'budget' in primary_class:
2149
+ price_range = 'budget'
2150
+
2151
+ # Enhanced location-specific price assessment
2152
+ location_assessment = "unknown"
2153
+ market_trends = {}
2154
+ if data.get('city') and price_per_sqft:
2155
+ city_lower = data['city'].lower()
2156
+ metro_cities = ["mumbai", "delhi", "bangalore", "hyderabad", "chennai", "kolkata", "pune"]
2157
+
2158
+ # Define price ranges for different city tiers
2159
+ if any(city in city_lower for city in metro_cities):
2160
+ market_trends = {
2161
+ 'city_tier': 'metro',
2162
+ 'avg_price_range': {
2163
+ 'min': 5000,
2164
+ 'max': 30000,
2165
+ 'trend': 'stable'
2166
+ },
2167
+ 'price_per_sqft': {
2168
+ 'current': price_per_sqft,
2169
+ 'market_avg': 15000,
2170
+ 'deviation': abs(price_per_sqft - 15000) / 15000 * 100
2171
+ }
2172
+ }
2173
+ location_assessment = (
2174
+ "reasonable" if 5000 <= price_per_sqft <= 30000 else
2175
+ "suspiciously low" if price_per_sqft < 5000 else
2176
+ "suspiciously high"
2177
+ )
2178
+ else:
2179
+ market_trends = {
2180
+ 'city_tier': 'non-metro',
2181
+ 'avg_price_range': {
2182
+ 'min': 1500,
2183
+ 'max': 15000,
2184
+ 'trend': 'stable'
2185
+ },
2186
+ 'price_per_sqft': {
2187
+ 'current': price_per_sqft,
2188
+ 'market_avg': 7500,
2189
+ 'deviation': abs(price_per_sqft - 7500) / 7500 * 100
2190
+ }
2191
+ }
2192
+ location_assessment = (
2193
+ "reasonable" if 1500 <= price_per_sqft <= 15000 else
2194
+ "suspiciously low" if price_per_sqft < 1500 else
2195
+ "suspiciously high"
2196
+ )
2197
+
2198
+ # Enhanced price analysis factors
2199
+ price_factors = {}
2200
+ risk_indicators = []
2201
+
2202
+ # Property age factor
2203
+ try:
2204
+ year_built = int(data.get('year_built', 0))
2205
+ current_year = datetime.now().year
2206
+ property_age = current_year - year_built
2207
+
2208
+ if property_age > 0:
2209
+ depreciation_factor = max(0.5, 1 - (property_age * 0.01)) # 1% depreciation per year, min 50%
2210
+ price_factors['age_factor'] = {
2211
+ 'property_age': property_age,
2212
+ 'depreciation_factor': depreciation_factor,
2213
+ 'impact': 'high' if property_age > 30 else 'medium' if property_age > 15 else 'low'
2214
+ }
2215
+ except:
2216
+ price_factors['age_factor'] = {'error': 'Invalid year built'}
2217
+
2218
+ # Size factor
2219
+ if sq_ft > 0:
2220
+ size_factor = {
2221
+ 'size': sq_ft,
2222
+ 'price_per_sqft': price_per_sqft,
2223
+ 'efficiency': 'high' if 800 <= sq_ft <= 2000 else 'medium' if 500 <= sq_ft <= 3000 else 'low'
2224
+ }
2225
+ price_factors['size_factor'] = size_factor
2226
+
2227
+ # Add risk indicators based on size
2228
+ if sq_ft < 300:
2229
+ risk_indicators.append('Unusually small property size')
2230
+ elif sq_ft > 10000:
2231
+ risk_indicators.append('Unusually large property size')
2232
+
2233
+ # Amenities factor
2234
+ if data.get('amenities'):
2235
+ amenities_list = [a.strip() for a in data['amenities'].split(',')]
2236
+ amenities_score = min(1.0, len(amenities_list) * 0.1) # 10% per amenity, max 100%
2237
+ price_factors['amenities_factor'] = {
2238
+ 'count': len(amenities_list),
2239
+ 'score': amenities_score,
2240
+ 'impact': 'high' if amenities_score > 0.7 else 'medium' if amenities_score > 0.4 else 'low'
2241
+ }
2242
+
2243
+ # Calculate overall confidence with weighted factors
2244
+ confidence_weights = {
2245
+ 'primary_classification': 0.3,
2246
+ 'location_assessment': 0.25,
2247
+ 'age_factor': 0.2,
2248
+ 'size_factor': 0.15,
2249
+ 'amenities_factor': 0.1
2250
  }
2251
 
2252
+ confidence_scores = []
 
 
 
 
2253
 
2254
+ # Primary classification confidence
2255
+ if top_classifications:
2256
+ confidence_scores.append(price_result['scores'][0] * confidence_weights['primary_classification'])
2257
+
2258
+ # Location assessment confidence
2259
+ location_confidence = 0.8 if location_assessment == "reasonable" else 0.4
2260
+ confidence_scores.append(location_confidence * confidence_weights['location_assessment'])
 
 
 
 
2261
 
2262
+ # Age factor confidence
2263
+ if 'age_factor' in price_factors and 'depreciation_factor' in price_factors['age_factor']:
2264
+ age_confidence = price_factors['age_factor']['depreciation_factor']
2265
+ confidence_scores.append(age_confidence * confidence_weights['age_factor'])
2266
 
2267
+ # Size factor confidence
2268
+ if 'size_factor' in price_factors:
2269
+ size_confidence = 0.8 if price_factors['size_factor']['efficiency'] == 'high' else 0.6
2270
+ confidence_scores.append(size_confidence * confidence_weights['size_factor'])
2271
 
2272
+ # Amenities factor confidence
2273
+ if 'amenities_factor' in price_factors:
2274
+ amenities_confidence = price_factors['amenities_factor']['score']
2275
+ confidence_scores.append(amenities_confidence * confidence_weights['amenities_factor'])
2276
+
2277
+ overall_confidence = sum(confidence_scores) / sum(confidence_weights.values())
2278
 
2279
  return {
2280
+ 'assessment': top_classifications[0]['classification'] if top_classifications else 'could not classify',
2281
+ 'confidence': float(overall_confidence),
2282
  'price': price,
2283
+ 'formatted_price': f"₹{price:,.0f}",
2284
  'price_per_sqft': price_per_sqft,
2285
+ 'formatted_price_per_sqft': f"₹{price_per_sqft:,.2f}",
2286
+ 'price_range': price_range,
2287
+ 'location_price_assessment': location_assessment,
2288
+ 'has_price': True,
2289
+ 'market_trends': market_trends,
2290
+ 'price_factors': price_factors,
2291
+ 'risk_indicators': risk_indicators,
2292
+ 'top_classifications': top_classifications
2293
  }
2294
  except Exception as e:
2295
  logger.error(f"Error analyzing price: {str(e)}")
2296
+ return {
2297
+ 'assessment': 'error',
2298
+ 'confidence': 0.0,
2299
+ 'price': 0,
2300
+ 'formatted_price': '₹0',
2301
+ 'price_per_sqft': 0,
2302
+ 'formatted_price_per_sqft': '₹0',
2303
+ 'price_range': 'unknown',
2304
+ 'location_price_assessment': 'error',
2305
+ 'has_price': False,
2306
+ 'market_trends': {},
2307
+ 'price_factors': {},
2308
+ 'risk_indicators': [],
2309
+ 'top_classifications': []
2310
+ }
2311
 
2312
  def analyze_legal_details(legal_text):
2313
  try:
2314
+ if not legal_text or len(legal_text.strip()) < 5:
2315
+ return {
2316
+ 'assessment': 'insufficient',
2317
  'confidence': 0.0,
2318
+ 'summary': 'No legal details provided',
2319
  'completeness_score': 0,
2320
+ 'potential_issues': False,
2321
+ 'legal_metrics': {},
2322
+ 'reasoning': 'No legal details provided for analysis',
 
 
 
 
 
 
2323
  'top_classifications': []
2324
  }
2325
 
2326
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
 
 
 
2327
 
2328
  # Enhanced legal categories with more specific indicators
2329
  categories = [
 
2372
  })
2373
 
2374
  # Generate summary using BART
2375
+ summary = summarize_text(legal_text[:1000])
 
 
2376
 
2377
  # Calculate legal metrics with weighted scoring
2378
  legal_metrics = {
 
2441
  (1 - legal_metrics['risk_level']) * 0.2
2442
  ))
2443
 
2444
+ return {
2445
+ 'assessment': top_classifications[0]['classification'] if top_classifications else 'could not assess',
2446
+ 'confidence': float(overall_confidence),
2447
+ 'summary': summary,
2448
+ 'completeness_score': int(completeness_score),
2449
+ 'potential_issues': potential_issues,
2450
+ 'legal_metrics': legal_metrics,
2451
+ 'reasoning': '. '.join(reasoning_parts),
2452
+ 'top_classifications': top_classifications
2453
+ }
 
 
 
 
 
2454
  except Exception as e:
2455
  logger.error(f"Error analyzing legal details: {str(e)}")
2456
  return {
2457
+ 'assessment': 'could not assess',
2458
  'confidence': 0.0,
2459
+ 'summary': 'Error analyzing legal details',
2460
  'completeness_score': 0,
2461
+ 'potential_issues': False,
2462
+ 'legal_metrics': {},
2463
+ 'reasoning': 'Technical error occurred during analysis',
 
 
 
 
 
 
2464
  'top_classifications': []
2465
  }
2466
 
 
2944
 
2945
  def check_if_property_related(text):
2946
  try:
2947
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
2948
  result = classifier(text[:1000], ["property-related", "non-property-related"])
2949
  is_related = result['labels'][0] == "property-related"
2950
  return {
 
2958
  'confidence': 0.0
2959
  }
2960
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2961
  if __name__ == '__main__':
2962
  # Run Flask app
2963
  app.run(host='0.0.0.0', port=8000, debug=True, use_reloader=False)