sksameermujahid commited on
Commit
f558e11
·
verified ·
1 Parent(s): 4340445

Update newapp.py

Browse files
Files changed (1) hide show
  1. newapp.py +610 -612
newapp.py CHANGED
@@ -1,7 +1,7 @@
1
  from flask import Flask, render_template, request, jsonify
2
  from flask_cors import CORS
3
  import torch
4
- from transformers import pipeline, CLIPProcessor, CLIPModel, AutoModelForSequenceClassification, AutoTokenizer
5
  import base64
6
  import io
7
  import re
@@ -23,309 +23,68 @@ import logging
23
  from functools import lru_cache
24
  import time
25
  import math
 
26
  import threading
27
  import gc
28
- import asyncio
29
- from concurrent.futures import ThreadPoolExecutor
30
- from werkzeug.serving import WSGIRequestHandler
31
 
32
- # Initialize Flask app
33
  app = Flask(__name__)
34
- CORS(app)
35
 
36
  # Configure logging
37
  logging.basicConfig(
38
  level=logging.INFO,
39
  format='%(asctime)s - %(levelname)s - %(message)s',
40
  handlers=[
 
41
  logging.StreamHandler()
42
  ]
43
  )
44
  logger = logging.getLogger(__name__)
45
 
46
- # Global variables for models with lazy loading and memory management
47
- models = {
48
- 'clip_processor': None,
49
- 'clip_model': None,
50
- 'sentence_model': None,
51
- 'nlp': None,
52
- 'geocoder': None,
53
- 'summarizer': None,
54
- 'classifier': None
55
- }
56
-
57
- def load_model_safely(model_name, loader_func, *args, **kwargs):
58
- """Safely load a model with memory management"""
59
- try:
60
- # Clear memory before loading
61
- gc.collect()
62
- torch.cuda.empty_cache() if torch.cuda.is_available() else None
63
-
64
- # Remove memory-efficient settings for non-transformers models
65
- if model_name in ['geocoder', 'nlp']:
66
- kwargs.pop('torch_dtype', None)
67
- kwargs.pop('low_cpu_mem_usage', None)
68
- else:
69
- # Add memory-efficient settings for transformers models
70
- if 'device' in kwargs:
71
- kwargs['device'] = -1 # Force CPU usage
72
- if 'model' in kwargs:
73
- # Use smallest available models
74
- if kwargs['model'] == 'facebook/bart-large-cnn':
75
- kwargs['model'] = 'sshleifer/distilbart-cnn-6-6' # Even smaller CNN model
76
- elif kwargs['model'] == 'facebook/bart-large-mnli':
77
- kwargs['model'] = 'cross-encoder/nli-distilroberta-base' # Smaller NLI model
78
-
79
- model = loader_func(*args, **kwargs)
80
-
81
- # Set model to evaluation mode and disable gradients
82
- if hasattr(model, 'eval'):
83
- model.eval()
84
- if hasattr(model, 'requires_grad_'):
85
- model.requires_grad_(False)
86
-
87
- logger.info(f"Successfully loaded model: {model_name}")
88
- return model
89
- except Exception as e:
90
- logger.error(f"Error loading model {model_name}: {str(e)}")
91
- return None
92
-
93
- def cleanup_models():
94
- """Clean up model resources"""
95
- try:
96
- for model_name, model in models.items():
97
- if model is not None:
98
- if hasattr(model, 'cpu'):
99
- model.cpu()
100
- if hasattr(model, 'to'):
101
- model.to('cpu')
102
- del model
103
- models[model_name] = None
104
- gc.collect()
105
- if torch.cuda.is_available():
106
- torch.cuda.empty_cache()
107
- except Exception as e:
108
- logger.error(f"Error in cleanup_models: {str(e)}")
109
-
110
- @app.before_request
111
- def before_request():
112
- """Clear memory before each request"""
113
- try:
114
- gc.collect()
115
- if torch.cuda.is_available():
116
- torch.cuda.empty_cache()
117
- except Exception as e:
118
- logger.error(f"Error in before_request: {str(e)}")
119
-
120
- @app.after_request
121
- def after_request(response):
122
- """Clean up memory after each request"""
123
- try:
124
- cleanup_models()
125
- gc.collect()
126
- if torch.cuda.is_available():
127
- torch.cuda.empty_cache()
128
- except Exception as e:
129
- logger.error(f"Error in after_request: {str(e)}")
130
- return response
131
-
132
- def get_model(model_name):
133
- """Lazy loading of models with optimized configurations"""
134
- if models[model_name] is None:
135
- try:
136
- if model_name == 'clip_processor':
137
- models[model_name] = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
138
- elif model_name == 'clip_model':
139
- models[model_name] = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
140
- elif model_name == 'sentence_model':
141
- models[model_name] = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
142
- elif model_name == 'nlp':
143
- models[model_name] = spacy.load('en_core_web_sm')
144
- elif model_name == 'geocoder':
145
- models[model_name] = Nominatim(user_agent="indian_property_verifier", timeout=10)
146
- elif model_name == 'summarizer':
147
- models[model_name] = load_model(
148
- "summarization",
149
- "sshleifer/distilbart-cnn-6-6"
150
- )
151
- elif model_name == 'classifier':
152
- models[model_name] = load_model(
153
- "zero-shot-classification",
154
- "facebook/bart-large-mnli"
155
- )
156
-
157
- # Set model to evaluation mode and disable gradients
158
- if hasattr(models[model_name], 'eval'):
159
- models[model_name].eval()
160
- if hasattr(models[model_name], 'requires_grad_'):
161
- models[model_name].requires_grad_(False)
162
-
163
- logger.info(f"Successfully loaded model: {model_name}")
164
- except Exception as e:
165
- logger.error(f"Error loading model {model_name}: {str(e)}")
166
- models[model_name] = None
167
-
168
- return models[model_name]
169
-
170
- def process_batch(items, batch_size=4):
171
- """Process items in batches to manage memory"""
172
- for i in range(0, len(items), batch_size):
173
- batch = items[i:i + batch_size]
174
- yield batch
175
- # Clean up after each batch
176
- gc.collect()
177
- torch.cuda.empty_cache() if torch.cuda.is_available() else None
178
-
179
- def analyze_images(images, batch_size=4):
180
- """Analyze images in batches"""
181
- results = []
182
- for batch in process_batch(images, batch_size):
183
- batch_results = []
184
- for img in batch:
185
- try:
186
- analysis = analyze_image(img)
187
- batch_results.append(analysis)
188
- except Exception as e:
189
- logger.error(f"Error analyzing image: {str(e)}")
190
- batch_results.append({'error': str(e)})
191
- results.extend(batch_results)
192
- return results
193
-
194
- def analyze_documents(documents, batch_size=2):
195
- """Analyze documents in batches"""
196
- results = []
197
- for batch in process_batch(documents, batch_size):
198
- batch_results = []
199
- for doc in batch:
200
- try:
201
- analysis = analyze_pdf_content(doc)
202
- batch_results.append(analysis)
203
- except Exception as e:
204
- logger.error(f"Error analyzing document: {str(e)}")
205
- batch_results.append({'error': str(e)})
206
- results.extend(batch_results)
207
- return results
208
-
209
- def initialize_models():
210
- """Initialize all models with proper error handling"""
211
- try:
212
- # Initialize geocoder
213
- models['geocoder'] = Nominatim(user_agent="indian_property_verifier", timeout=10)
214
- logger.info("Geocoder initialized successfully")
215
- except Exception as e:
216
- logger.error(f"Error initializing geocoder: {str(e)}")
217
-
218
- try:
219
- # Initialize CLIP model
220
- models['clip_processor'] = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
221
- models['clip_model'] = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
222
- logger.info("CLIP model loaded successfully")
223
- except Exception as e:
224
- logger.error(f"Error loading CLIP model: {str(e)}")
225
-
226
- try:
227
- # Initialize sentence transformer
228
- models['sentence_model'] = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
229
- logger.info("Sentence transformer loaded successfully")
230
- except Exception as e:
231
- logger.error(f"Error loading sentence transformer: {str(e)}")
232
-
233
- try:
234
- # Initialize spaCy
235
- models['nlp'] = spacy.load('en_core_web_sm')
236
- logger.info("spaCy model loaded successfully")
237
- except Exception as e:
238
- logger.error(f"Error loading spaCy model: {str(e)}")
239
-
240
- try:
241
- # Initialize summarizer
242
- models['summarizer'] = pipeline(
243
- "summarization",
244
- model="sshleifer/distilbart-cnn-6-6",
245
- device=-1,
246
- max_length=100,
247
- min_length=20
248
- )
249
- logger.info("Summarizer model loaded successfully")
250
- except Exception as e:
251
- logger.error(f"Error loading summarizer model: {str(e)}")
252
-
253
- try:
254
- # Initialize classifier
255
- models['classifier'] = pipeline(
256
- "zero-shot-classification",
257
- model="cross-encoder/nli-distilroberta-base",
258
- device=-1
259
- )
260
- logger.info("Classifier model loaded successfully")
261
- except Exception as e:
262
- logger.error(f"Error loading classifier model: {str(e)}")
263
-
264
- # Cache models
265
- @lru_cache(maxsize=10)
266
- def load_model(task, model_name):
267
- try:
268
- logger.info(f"Loading model: {model_name} for task: {task}")
269
-
270
- # Use smaller models for CPU
271
- if task == "zero-shot-classification":
272
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
273
- # Use a smaller model for zero-shot classification
274
- model = AutoModelForSequenceClassification.from_pretrained(
275
- "facebook/bart-large-mnli",
276
- torch_dtype=torch.float32,
277
- device_map="auto",
278
- low_cpu_mem_usage=True
279
- )
280
- tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")
281
- return pipeline(
282
- task,
283
- model=model,
284
- tokenizer=tokenizer,
285
- device=-1,
286
- torch_dtype=torch.float32
287
- )
288
- elif task == "summarization":
289
- # Use a smaller model for summarization
290
- return pipeline(
291
- task,
292
- model="sshleifer/distilbart-cnn-6-6",
293
- device=-1,
294
- torch_dtype=torch.float32,
295
- model_kwargs={"low_cpu_mem_usage": True}
296
- )
297
- elif task == "text-classification":
298
- # Use a smaller model for text classification
299
- return pipeline(
300
- task,
301
- model="distilbert-base-uncased-finetuned-sst-2-english",
302
- device=-1,
303
- torch_dtype=torch.float32,
304
- model_kwargs={"low_cpu_mem_usage": True}
305
- )
306
- else:
307
- # Default pipeline for other tasks with memory optimization
308
- return pipeline(
309
- task,
310
- model=model_name,
311
- device=-1,
312
- torch_dtype=torch.float32,
313
- model_kwargs={"low_cpu_mem_usage": True}
314
- )
315
- except Exception as e:
316
- logger.error(f"Error loading model {model_name}: {str(e)}")
317
- # Try simpler configuration
318
- try:
319
- logger.info("Attempting simpler configuration...")
320
- return pipeline(
321
- task,
322
- model=model_name,
323
- device=-1,
324
- model_kwargs={"low_cpu_mem_usage": True}
325
- )
326
- except Exception as e2:
327
- logger.error(f"Simpler configuration also failed: {str(e2)}")
328
- raise
329
 
330
  def make_json_serializable(obj):
331
  try:
@@ -365,17 +124,10 @@ def get_location():
365
  'message': 'Latitude and longitude are required'
366
  }), 400
367
 
368
- if not models['geocoder']:
369
- logger.error("Geocoder not initialized")
370
- return jsonify({
371
- 'status': 'error',
372
- 'message': 'Service temporarily unavailable'
373
- }), 503
374
-
375
  # Retry geocoding up to 3 times
376
  for attempt in range(3):
377
  try:
378
- location = models['geocoder'].reverse((latitude, longitude), exactly_one=True)
379
  if location:
380
  address_components = location.raw.get('address', {})
381
  return jsonify({
@@ -410,91 +162,80 @@ def get_location():
410
  @app.route('/verify', methods=['POST'])
411
  def verify_property():
412
  try:
413
- # Initialize data dictionary
414
- data = {
415
- 'property_name': '',
416
- 'property_type': '',
417
- 'status': '',
418
- 'description': '',
419
- 'address': '',
420
- 'city': '',
421
- 'state': '',
422
- 'country': 'India',
423
- 'zip': '',
424
- 'latitude': '',
425
- 'longitude': '',
426
- 'bedrooms': '',
427
- 'bathrooms': '',
428
- 'total_rooms': '',
429
- 'year_built': '',
430
- 'parking': '',
431
- 'sq_ft': '',
432
- 'market_value': '',
433
- 'amenities': '',
434
- 'nearby_landmarks': '',
435
- 'legal_details': ''
436
- }
437
-
438
- # Try to get data from JSON first
439
- if request.is_json:
440
- json_data = request.get_json()
441
- if json_data:
442
- for key in data:
443
- if key in json_data:
444
- data[key] = str(json_data[key]).strip()
445
- # Then try form data
446
- elif request.form:
447
- for key in data:
448
- if key in request.form:
449
- data[key] = request.form.get(key, '').strip()
450
-
451
- # Check if we have at least some basic data
452
- if not any(data.values()):
453
- logger.warning("No data provided in request")
454
  return jsonify({
455
  'error': 'No data provided',
456
  'status': 'error'
457
  }), 400
458
 
459
- # Initialize results with default values
460
- results = {
461
- 'report_id': str(uuid.uuid4()),
462
- 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
463
- 'summary': None,
464
- 'fraud_classification': None,
465
- 'trust_score': {'score': 0, 'reasoning': 'Insufficient data'},
466
- 'suggestions': [],
467
- 'quality_assessment': None,
468
- 'address_verification': None,
469
- 'cross_validation': None,
470
- 'location_analysis': None,
471
- 'price_analysis': None,
472
- 'legal_analysis': None,
473
- 'document_analysis': {'pdf_count': 0, 'pdf_texts': [], 'pdf_analysis': []},
474
- 'image_analysis': {'image_count': 0, 'image_analysis': []},
475
- 'specs_verification': None,
476
- 'market_analysis': None,
477
- 'images': [],
478
- 'missing_data': []
 
 
479
  }
480
 
481
- # Process images in batches if available
 
 
 
 
 
 
 
 
 
 
482
  if 'images' in request.files:
483
  image_files = request.files.getlist('images')
484
- results['image_analysis']['image_analysis'] = analyze_images(image_files)
485
- results['image_analysis']['image_count'] = len(results['image_analysis']['image_analysis'])
486
- else:
487
- results['missing_data'].append("No images provided")
488
-
489
- # Process documents in batches if available
 
 
 
 
 
 
 
 
 
490
  if 'documents' in request.files:
491
  pdf_files = request.files.getlist('documents')
492
- results['document_analysis']['pdf_analysis'] = analyze_documents(pdf_files)
493
- results['document_analysis']['pdf_count'] = len(results['document_analysis']['pdf_analysis'])
494
- else:
495
- results['missing_data'].append("No documents provided")
 
 
 
 
 
 
 
 
496
 
497
- # Generate consolidated text from available data
498
  consolidated_text = f"""
499
  Property Name: {data['property_name']}
500
  Property Type: {data['property_type']}
@@ -512,92 +253,73 @@ def verify_property():
512
  Legal Details: {data['legal_details']}
513
  """
514
 
515
- # Perform analysis based on available data
516
  try:
517
- # Generate property summary if basic data is available
518
- if data['property_name'] and data['property_type']:
519
- summarizer = get_model('summarizer')
520
- if summarizer:
521
- results['summary'] = generate_property_summary(data)
 
522
  else:
523
- results['missing_data'].append("Summary generation unavailable")
524
-
525
- # Perform fraud classification if enough data is available
526
- if len(consolidated_text.strip()) > 50:
527
- classifier = get_model('classifier')
528
- if classifier:
529
- results['fraud_classification'] = classify_fraud(consolidated_text, data, classifier)
530
  else:
531
- results['missing_data'].append("Fraud classification unavailable")
532
-
533
- # Generate trust score based on available data
534
- if len(consolidated_text.strip()) > 50:
535
- results['trust_score'] = generate_trust_score(consolidated_text, [], [])
536
-
537
- # Generate suggestions based on available data
538
- if len(consolidated_text.strip()) > 50:
539
- results['suggestions'] = generate_suggestions(consolidated_text, data)
540
-
541
- # Address verification if location data is available
542
- if data['address'] and data['city'] and data['state']:
543
- geocoder = get_model('geocoder')
544
- if geocoder:
545
- results['address_verification'] = verify_address(data)
546
- else:
547
- results['missing_data'].append("Address verification unavailable")
548
-
549
- # Cross validation if property details are available
550
- if data['bedrooms'] or data['bathrooms'] or data['sq_ft'] or data['market_value']:
551
- results['cross_validation'] = perform_cross_validation(data)
552
-
553
- # Location analysis if location data is available
554
- if data['latitude'] and data['longitude']:
555
- geocoder = get_model('geocoder')
556
- if geocoder:
557
- results['location_analysis'] = analyze_location(data)
558
- else:
559
- results['missing_data'].append("Location analysis unavailable")
560
-
561
- # Price analysis if price data is available
562
- if data['market_value']:
563
- classifier = get_model('classifier')
564
- if classifier:
565
- results['price_analysis'] = analyze_price(data)
566
- else:
567
- results['missing_data'].append("Price analysis unavailable")
568
-
569
- # Legal analysis if legal details are available
570
- if data['legal_details']:
571
- classifier = get_model('classifier')
572
- if classifier:
573
- results['legal_analysis'] = analyze_legal_details(data['legal_details'])
574
- else:
575
- results['missing_data'].append("Legal analysis unavailable")
576
-
577
- # Property specs verification if specs are available
578
- if data['bedrooms'] or data['bathrooms'] or data['sq_ft'] or data['market_value']:
579
- results['specs_verification'] = verify_property_specs(data)
580
-
581
- # Market analysis if price and property details are available
582
- if data['market_value'] and (data['sq_ft'] or data['property_type']):
583
- classifier = get_model('classifier')
584
- if classifier:
585
- results['market_analysis'] = analyze_market_value(data)
586
- else:
587
- results['missing_data'].append("Market analysis unavailable")
588
-
589
  except Exception as e:
590
- logger.error(f"Error during analysis: {str(e)}")
591
- results['error'] = f"Error during analysis: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
 
593
- # Clean up after processing
594
- cleanup_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
 
596
  return jsonify(make_json_serializable(results))
597
 
598
  except Exception as e:
599
  logger.error(f"Error in verify_property: {str(e)}")
600
- cleanup_models()
601
  return jsonify({
602
  'error': 'Server error occurred. Please try again later.',
603
  'status': 'error',
@@ -618,9 +340,9 @@ def extract_pdf_text(pdf_file):
618
 
619
  def analyze_image(image):
620
  try:
621
- if models['clip_processor'] and models['clip_model']:
622
  img_rgb = image.convert('RGB')
623
- inputs = models['clip_processor'](
624
  text=[
625
  "real estate property interior",
626
  "real estate property exterior",
@@ -632,7 +354,7 @@ def analyze_image(image):
632
  return_tensors="pt",
633
  padding=True
634
  )
635
- outputs = models['clip_model'](**inputs)
636
  logits_per_image = outputs.logits_per_image
637
  probs = logits_per_image.softmax(dim=1).detach().numpy()[0]
638
 
@@ -799,7 +521,7 @@ def analyze_pdf_content(document_text, property_data):
799
 
800
  def check_document_consistency(document_text, property_data):
801
  try:
802
- if not models['sentence_model']:
803
  logger.warning("Sentence model unavailable")
804
  return 0.5
805
  property_text = ' '.join([
@@ -808,8 +530,8 @@ def check_document_consistency(document_text, property_data):
808
  'state', 'market_value', 'sq_ft', 'bedrooms'
809
  ]
810
  ])
811
- property_embedding = models['sentence_model'].encode(property_text)
812
- document_embedding = models['sentence_model'].encode(document_text[:1000])
813
  similarity = util.cos_sim(property_embedding, document_embedding)[0][0].item()
814
  return max(0.0, min(1.0, float(similarity)))
815
  except Exception as e:
@@ -838,48 +560,75 @@ def extract_document_key_info(text):
838
  return {}
839
 
840
  def generate_property_summary(data):
841
- """Generate a summary of the property listing"""
842
  try:
843
- # Get the summarizer model
844
- summarizer = get_model('summarizer')
845
- if summarizer is None:
846
- logger.error("Summarizer model not available")
847
- return "Unable to generate summary due to model unavailability"
848
-
849
  # Create a detailed context for summary generation
850
- context = f"""
851
- Property Name: {data.get('property_name', 'Not specified')}
852
- Property Type: {data.get('property_type', 'Not specified')}
853
- Location: {data.get('address', 'Not specified')}, {data.get('city', 'Not specified')}, {data.get('state', 'Not specified')}
854
- Price: {data.get('market_value', 'Not specified')}
855
- Area: {data.get('sq_ft', 'Not specified')} sq.ft.
856
- Bedrooms: {data.get('bedrooms', 'Not specified')}
857
- Bathrooms: {data.get('bathrooms', 'Not specified')}
858
- Year Built: {data.get('year_built', 'Not specified')}
859
- Parking: {data.get('parking', 'Not specified')} spaces
860
- Amenities: {data.get('amenities', 'Not specified')}
861
- Nearby Landmarks: {data.get('nearby_landmarks', 'Not specified')}
862
  """
863
-
 
 
 
864
  # Generate initial summary
865
- summary = summarizer(context, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
866
-
867
- # Enhance the summary with key features
868
- enhanced_summary = f"Property Summary: {summary}"
869
 
870
- # Add key features if available
871
  key_features = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
872
  if data.get('amenities'):
873
  key_features.append(f"Amenities: {data['amenities']}")
874
- if data.get('nearby_landmarks'):
875
- key_features.append(f"Nearby: {data['nearby_landmarks']}")
 
876
  if key_features:
877
- enhanced_summary += f"\nKey Features: {', '.join(key_features)}"
 
 
 
878
 
879
  return enhanced_summary
880
  except Exception as e:
881
  logger.error(f"Error generating property summary: {str(e)}")
882
- return "Error generating property summary"
883
 
884
  def summarize_text(text):
885
  try:
@@ -895,8 +644,9 @@ def summarize_text(text):
895
  logger.error(f"Error summarizing text: {str(e)}")
896
  return text[:200] + "..." if len(text) > 200 else text
897
 
898
- def classify_fraud(text, data, classifier):
899
  try:
 
900
  categories = [
901
  "suspicious pricing pattern",
902
  "potentially fraudulent listing",
@@ -916,8 +666,8 @@ def classify_fraud(text, data, classifier):
916
  - Name: {data.get('property_name', 'Not provided')}
917
  - Type: {data.get('property_type', 'Not provided')}
918
  - Status: {data.get('property_status', 'Not provided')}
919
- - Price: {data.get('market_value', 'Not provided')}
920
- - Square Footage: {data.get('sq_ft', 'Not provided')}
921
  - Year Built: {data.get('year_built', 'Not provided')}
922
  - Location: {data.get('address', 'Not provided')}
923
  - Description: {text}
@@ -939,7 +689,7 @@ def classify_fraud(text, data, classifier):
939
  high_risk.append((label, score))
940
  elif score > 0.5:
941
  medium_risk.append((label, score))
942
- else:
943
  low_risk.append((label, score))
944
 
945
  # Calculate alert score with adjusted weights
@@ -961,12 +711,97 @@ def classify_fraud(text, data, classifier):
961
  else:
962
  alert_level = 'minimal'
963
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
964
  return {
965
  'alert_level': alert_level,
966
  'alert_score': alert_score,
967
  'high_risk': high_risk,
968
  'medium_risk': medium_risk,
969
- 'low_risk': low_risk
 
970
  }
971
  except Exception as e:
972
  logger.error(f"Error in fraud classification: {str(e)}")
@@ -975,7 +810,8 @@ def classify_fraud(text, data, classifier):
975
  'alert_score': 1.0,
976
  'high_risk': [],
977
  'medium_risk': [],
978
- 'low_risk': []
 
979
  }
980
 
981
  def generate_trust_score(text, image_analysis, pdf_analysis):
@@ -1094,114 +930,244 @@ def generate_trust_score(text, image_analysis, pdf_analysis):
1094
  logger.error(f"Error generating trust score: {str(e)}")
1095
  return 20, "Could not assess trust."
1096
 
1097
- def generate_suggestions(consolidated_text, data):
1098
- """Generate property improvement suggestions based on analysis."""
1099
  try:
1100
- # Get the classifier model
1101
- classifier = get_model('classifier')
1102
- if classifier is None:
1103
- return []
1104
-
1105
- # Prepare context for suggestions
1106
- suggestion_context = f"""
1107
- Property Analysis Context:
1108
- {consolidated_text}
1109
- Property Type: {data.get('property_type', 'unknown')}
1110
- Location: {data.get('city', 'unknown')}, {data.get('state', 'unknown')}
1111
- Price: ₹{data.get('market_value', 'unknown')}
1112
- Area: {data.get('sq_ft', 'unknown')} sq.ft.
1113
- Year Built: {data.get('year_built', 'unknown')}
1114
- """
1115
-
1116
- # Define base suggestions with weights
1117
  base_suggestions = {
1118
- 'Presentation': {
1119
- 'categories': ['excellent', 'good', 'fair', 'poor'],
1120
- 'weight': 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121
  'improvements': {
1122
- 'fair': [
1123
- 'Consider professional staging',
1124
- 'Improve lighting in key areas',
1125
- 'Declutter living spaces'
1126
  ],
1127
- 'poor': [
1128
- 'Hire professional photographer',
1129
- 'Deep clean the property',
1130
- 'Repaint walls in neutral colors'
1131
  ]
1132
  }
1133
  },
1134
- 'Maintenance': {
1135
- 'categories': ['well-maintained', 'needs-minor-work', 'needs-major-work'],
1136
- 'weight': 0.9,
 
 
 
 
 
 
1137
  'improvements': {
1138
- 'needs-minor-work': [
1139
- 'Fix minor plumbing issues',
1140
- 'Repair cracked tiles',
1141
- 'Replace worn-out fixtures'
1142
  ],
1143
- 'needs-major-work': [
1144
- 'Address structural issues',
1145
- 'Update electrical system',
1146
- 'Replace HVAC system'
1147
  ]
1148
  }
1149
  },
1150
- 'Market Appeal': {
1151
- 'categories': ['high', 'medium', 'low'],
1152
- 'weight': 0.8,
 
 
 
 
 
 
1153
  'improvements': {
1154
- 'medium': [
1155
- 'Enhance curb appeal',
1156
- 'Update kitchen appliances',
1157
- 'Add modern amenities'
1158
  ],
1159
- 'low': [
1160
- 'Consider price adjustment',
1161
- 'Improve property description',
1162
- 'Highlight unique features'
1163
  ]
1164
  }
1165
  }
1166
  }
1167
-
1168
  suggestions = []
1169
  confidence_scores = []
1170
-
1171
- # Analyze each aspect
1172
  for aspect, config in base_suggestions.items():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1173
  try:
1174
- # Analyze each aspect with context
1175
- result = classifier(suggestion_context[:1000], config['categories'])
1176
-
1177
- # Get the most relevant category
1178
- top_category = result['labels'][0]
1179
- confidence = float(result['scores'][0])
1180
-
1181
- # If the category indicates improvement needed (confidence < 0.6)
1182
- if confidence < 0.6 and top_category in config.get('improvements', {}):
1183
- weighted_confidence = confidence * config['weight']
1184
- for improvement in config['improvements'][top_category]:
1185
  suggestions.append({
1186
- 'aspect': aspect,
1187
- 'category': top_category,
1188
- 'suggestion': improvement,
1189
- 'confidence': weighted_confidence
1190
  })
1191
- confidence_scores.append(weighted_confidence)
1192
- except Exception as e:
1193
- logger.error(f"Error analyzing aspect {aspect}: {str(e)}")
1194
- continue
1195
-
1196
- # Sort suggestions by confidence
1197
- suggestions.sort(key=lambda x: x['confidence'], reverse=True)
1198
 
1199
- # Return top 10 suggestions
1200
- return suggestions[:10]
1201
-
 
 
 
 
 
 
 
 
1202
  except Exception as e:
1203
  logger.error(f"Error generating suggestions: {str(e)}")
1204
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
1205
 
1206
  def assess_text_quality(text):
1207
  try:
@@ -1331,8 +1297,7 @@ def verify_address(data):
1331
  'verification_score': 0.0
1332
  }
1333
 
1334
- # Verify pincode
1335
- if data.get('zip'):
1336
  try:
1337
  response = requests.get(f"https://api.postalpincode.in/pincode/{data['zip']}", timeout=5)
1338
  if response.status_code == 200:
@@ -1354,28 +1319,14 @@ def verify_address(data):
1354
  logger.error(f"Pincode API error: {str(e)}")
1355
  address_results['issues'].append("Pincode validation failed")
1356
 
1357
- # Verify address with geocoding
1358
- full_address = ', '.join(filter(None, [
1359
- data.get('address', ''),
1360
- data.get('city', ''),
1361
- data.get('state', ''),
1362
- data.get('country', ''),
1363
- data.get('zip', '')
1364
- ]))
1365
-
1366
- if full_address:
1367
  try:
1368
- # Initialize geocoder if not already done
1369
- if not models['geocoder']:
1370
- models['geocoder'] = Nominatim(user_agent="property_verifier", timeout=10)
1371
-
1372
- location = models['geocoder'].geocode(full_address)
1373
  if location:
1374
  address_results['address_exists'] = True
1375
  address_results['confidence'] = 0.9
1376
-
1377
- # Verify coordinates if provided
1378
- if data.get('latitude') and data.get('longitude'):
1379
  try:
1380
  provided_coords = (float(data['latitude']), float(data['longitude']))
1381
  geocoded_coords = (location.latitude, location.longitude)
@@ -1384,16 +1335,16 @@ def verify_address(data):
1384
  address_results['coordinates_match'] = dist < 1.0
1385
  if not address_results['coordinates_match']:
1386
  address_results['issues'].append(f"Coordinates {dist:.2f}km off")
1387
- except Exception as e:
1388
- logger.error(f"Coordinate verification error: {str(e)}")
1389
  address_results['issues'].append("Invalid coordinates")
1390
- else:
1391
- address_results['issues'].append("Address not found in geocoding service")
1392
  except Exception as e:
1393
- logger.error(f"Geocoding error: {str(e)}")
1394
- address_results['issues'].append("Address geocoding failed")
 
 
1395
 
1396
- # Calculate verification score
1397
  verification_points = (
1398
  address_results['address_exists'] * 0.4 +
1399
  address_results['pincode_valid'] * 0.3 +
@@ -1404,16 +1355,9 @@ def verify_address(data):
1404
 
1405
  return address_results
1406
  except Exception as e:
1407
- logger.error(f"Error in verify_address: {str(e)}")
1408
- return {
1409
- 'address_exists': False,
1410
- 'pincode_valid': False,
1411
- 'city_state_match': False,
1412
- 'coordinates_match': False,
1413
- 'confidence': 0.0,
1414
- 'issues': [f"Error during verification: {str(e)}"],
1415
- 'verification_score': 0.0
1416
- }
1417
 
1418
  def perform_cross_validation(data):
1419
  try:
@@ -1793,7 +1737,7 @@ def analyze_location(data):
1793
  if data['city'] and data['state']:
1794
  for attempt in range(3):
1795
  try:
1796
- location = models['geocoder'].geocode(f"{data['city']}, {data['state']}, India")
1797
  if location:
1798
  location_quality = "verified"
1799
  break
@@ -2547,10 +2491,64 @@ def check_if_property_related(text):
2547
  'confidence': 0.0
2548
  }
2549
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2550
  if __name__ == '__main__':
2551
- # Initialize models before starting the server
2552
- initialize_models()
2553
- app.run(host='0.0.0.0', port=7860, debug=False)
2554
- else:
2555
- # Initialize models when running with gunicorn
2556
- initialize_models()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from flask import Flask, render_template, request, jsonify
2
  from flask_cors import CORS
3
  import torch
4
+ from transformers import pipeline, CLIPProcessor, CLIPModel, BitsAndBytesConfig
5
  import base64
6
  import io
7
  import re
 
23
  from functools import lru_cache
24
  import time
25
  import math
26
+ from pyngrok import ngrok
27
  import threading
28
  import gc
29
+ import psutil
 
 
30
 
 
31
  app = Flask(__name__)
32
+ CORS(app) # Enable CORS for frontend
33
 
34
  # Configure logging
35
  logging.basicConfig(
36
  level=logging.INFO,
37
  format='%(asctime)s - %(levelname)s - %(message)s',
38
  handlers=[
39
+ logging.FileHandler('app.log'),
40
  logging.StreamHandler()
41
  ]
42
  )
43
  logger = logging.getLogger(__name__)
44
 
45
+ # Initialize geocoder
46
+ geocoder = Nominatim(user_agent="indian_property_verifier", timeout=10)
47
+
48
+ # Add memory monitoring function
49
+ def monitor_memory():
50
+ while True:
51
+ process = psutil.Process()
52
+ memory_info = process.memory_info()
53
+ logger.info(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")
54
+ if memory_info.rss > 2 * 1024 * 1024 * 1024: # If using more than 2GB
55
+ logger.warning("High memory usage detected, clearing cache")
56
+ clear_model_cache()
57
+ time.sleep(300) # Check every 5 minutes
58
+
59
+ # Start memory monitoring in a separate thread
60
+ memory_monitor_thread = threading.Thread(target=monitor_memory, daemon=True)
61
+ memory_monitor_thread.start()
62
+
63
+ # Initialize CLIP model
64
+ try:
65
+ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
66
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
67
+ has_clip_model = True
68
+ logger.info("CLIP model loaded successfully")
69
+ except Exception as e:
70
+ logger.error(f"Error loading CLIP model: {str(e)}")
71
+ has_clip_model = False
72
+
73
+ # Initialize sentence transformer
74
+ try:
75
+ sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
76
+ logger.info("Sentence transformer loaded successfully")
77
+ except Exception as e:
78
+ logger.error(f"Error loading sentence transformer: {str(e)}")
79
+ sentence_model = None
80
+
81
+ # Initialize spaCy
82
+ try:
83
+ nlp = spacy.load('en_core_web_md')
84
+ logger.info("spaCy model loaded successfully")
85
+ except Exception as e:
86
+ logger.error(f"Error loading spaCy model: {str(e)}")
87
+ nlp = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  def make_json_serializable(obj):
90
  try:
 
124
  'message': 'Latitude and longitude are required'
125
  }), 400
126
 
 
 
 
 
 
 
 
127
  # Retry geocoding up to 3 times
128
  for attempt in range(3):
129
  try:
130
+ location = geocoder.reverse((latitude, longitude), exactly_one=True)
131
  if location:
132
  address_components = location.raw.get('address', {})
133
  return jsonify({
 
162
  @app.route('/verify', methods=['POST'])
163
  def verify_property():
164
  try:
165
+ if not request.form and not request.files:
166
+ logger.warning("No form data or files provided")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  return jsonify({
168
  'error': 'No data provided',
169
  'status': 'error'
170
  }), 400
171
 
172
+ data = {
173
+ 'property_name': request.form.get('property_name', '').strip(),
174
+ 'property_type': request.form.get('property_type', '').strip(),
175
+ 'status': request.form.get('status', '').strip(),
176
+ 'description': request.form.get('description', '').strip(),
177
+ 'address': request.form.get('address', '').strip(),
178
+ 'city': request.form.get('city', '').strip(),
179
+ 'state': request.form.get('state', '').strip(),
180
+ 'country': request.form.get('country', 'India').strip(),
181
+ 'zip': request.form.get('zip', '').strip(),
182
+ 'latitude': request.form.get('latitude', '').strip(),
183
+ 'longitude': request.form.get('longitude', '').strip(),
184
+ 'bedrooms': request.form.get('bedrooms', '').strip(),
185
+ 'bathrooms': request.form.get('bathrooms', '').strip(),
186
+ 'total_rooms': request.form.get('total_rooms', '').strip(),
187
+ 'year_built': request.form.get('year_built', '').strip(),
188
+ 'parking': request.form.get('parking', '').strip(),
189
+ 'sq_ft': request.form.get('sq_ft', '').strip(),
190
+ 'market_value': request.form.get('market_value', '').strip(),
191
+ 'amenities': request.form.get('amenities', '').strip(),
192
+ 'nearby_landmarks': request.form.get('nearby_landmarks', '').strip(),
193
+ 'legal_details': request.form.get('legal_details', '').strip()
194
  }
195
 
196
+ required_fields = ['property_name', 'property_type', 'address', 'city', 'state']
197
+ missing_fields = [field for field in required_fields if not data[field]]
198
+ if missing_fields:
199
+ logger.warning(f"Missing required fields: {', '.join(missing_fields)}")
200
+ return jsonify({
201
+ 'error': f"Missing required fields: {', '.join(missing_fields)}",
202
+ 'status': 'error'
203
+ }), 400
204
+
205
+ images = []
206
+ image_analysis = []
207
  if 'images' in request.files:
208
  image_files = request.files.getlist('images')
209
+ for img_file in image_files:
210
+ if img_file.filename and img_file.filename.lower().endswith(('.jpg', '.jpeg', '.png')):
211
+ try:
212
+ img = Image.open(img_file)
213
+ buffered = io.BytesIO()
214
+ img.save(buffered, format="JPEG")
215
+ img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
216
+ images.append(img_str)
217
+ image_analysis.append(analyze_image(img))
218
+ except Exception as e:
219
+ logger.error(f"Error processing image {img_file.filename}: {str(e)}")
220
+ image_analysis.append({'error': str(e), 'is_property_related': False})
221
+
222
+ pdf_texts = []
223
+ pdf_analysis = []
224
  if 'documents' in request.files:
225
  pdf_files = request.files.getlist('documents')
226
+ for pdf_file in pdf_files:
227
+ if pdf_file.filename and pdf_file.filename.lower().endswith('.pdf'):
228
+ try:
229
+ pdf_text = extract_pdf_text(pdf_file)
230
+ pdf_texts.append({
231
+ 'filename': pdf_file.filename,
232
+ 'text': pdf_text
233
+ })
234
+ pdf_analysis.append(analyze_pdf_content(pdf_text, data))
235
+ except Exception as e:
236
+ logger.error(f"Error processing PDF {pdf_file.filename}: {str(e)}")
237
+ pdf_analysis.append({'error': str(e)})
238
 
 
239
  consolidated_text = f"""
240
  Property Name: {data['property_name']}
241
  Property Type: {data['property_type']}
 
253
  Legal Details: {data['legal_details']}
254
  """
255
 
 
256
  try:
257
+ description = data['description']
258
+ if description and len(description) > 10:
259
+ text_language = detect(description)
260
+ if text_language != 'en':
261
+ translated_description = GoogleTranslator(source=text_language, target='en').translate(description)
262
+ data['description_translated'] = translated_description
263
  else:
264
+ data['description_translated'] = description
 
 
 
 
 
 
265
  else:
266
+ data['description_translated'] = description
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  except Exception as e:
268
+ logger.error(f"Error in language detection/translation: {str(e)}")
269
+ data['description_translated'] = data['description']
270
+
271
+ summary = generate_property_summary(data)
272
+ fraud_classification = classify_fraud(consolidated_text, data)
273
+ trust_score, trust_reasoning = generate_trust_score(consolidated_text, image_analysis, pdf_analysis)
274
+ suggestions = generate_suggestions(consolidated_text, data)
275
+ quality_assessment = assess_text_quality(data['description_translated'])
276
+ address_verification = verify_address(data)
277
+ cross_validation = perform_cross_validation(data)
278
+ location_analysis = analyze_location(data)
279
+ price_analysis = analyze_price(data)
280
+ legal_analysis = analyze_legal_details(data['legal_details'])
281
+ specs_verification = verify_property_specs(data)
282
+ market_analysis = analyze_market_value(data)
283
+
284
+ document_analysis = {
285
+ 'pdf_count': len(pdf_texts),
286
+ 'pdf_texts': pdf_texts,
287
+ 'pdf_analysis': pdf_analysis
288
+ }
289
+ image_results = {
290
+ 'image_count': len(images),
291
+ 'image_analysis': image_analysis
292
+ }
293
+
294
+ report_id = str(uuid.uuid4())
295
 
296
+ results = {
297
+ 'report_id': report_id,
298
+ 'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
299
+ 'summary': summary,
300
+ 'fraud_classification': fraud_classification,
301
+ 'trust_score': {
302
+ 'score': trust_score,
303
+ 'reasoning': trust_reasoning
304
+ },
305
+ 'suggestions': suggestions,
306
+ 'quality_assessment': quality_assessment,
307
+ 'address_verification': address_verification,
308
+ 'cross_validation': cross_validation,
309
+ 'location_analysis': location_analysis,
310
+ 'price_analysis': price_analysis,
311
+ 'legal_analysis': legal_analysis,
312
+ 'document_analysis': document_analysis,
313
+ 'image_analysis': image_results,
314
+ 'specs_verification': specs_verification,
315
+ 'market_analysis': market_analysis,
316
+ 'images': images
317
+ }
318
 
319
  return jsonify(make_json_serializable(results))
320
 
321
  except Exception as e:
322
  logger.error(f"Error in verify_property: {str(e)}")
 
323
  return jsonify({
324
  'error': 'Server error occurred. Please try again later.',
325
  'status': 'error',
 
340
 
341
  def analyze_image(image):
342
  try:
343
+ if has_clip_model:
344
  img_rgb = image.convert('RGB')
345
+ inputs = clip_processor(
346
  text=[
347
  "real estate property interior",
348
  "real estate property exterior",
 
354
  return_tensors="pt",
355
  padding=True
356
  )
357
+ outputs = clip_model(**inputs)
358
  logits_per_image = outputs.logits_per_image
359
  probs = logits_per_image.softmax(dim=1).detach().numpy()[0]
360
 
 
521
 
522
  def check_document_consistency(document_text, property_data):
523
  try:
524
+ if not sentence_model:
525
  logger.warning("Sentence model unavailable")
526
  return 0.5
527
  property_text = ' '.join([
 
530
  'state', 'market_value', 'sq_ft', 'bedrooms'
531
  ]
532
  ])
533
+ property_embedding = sentence_model.encode(property_text)
534
+ document_embedding = sentence_model.encode(document_text[:1000])
535
  similarity = util.cos_sim(property_embedding, document_embedding)[0][0].item()
536
  return max(0.0, min(1.0, float(similarity)))
537
  except Exception as e:
 
560
  return {}
561
 
562
  def generate_property_summary(data):
 
563
  try:
 
 
 
 
 
 
564
  # Create a detailed context for summary generation
565
+ property_context = f"""
566
+ Property Name: {data.get('property_name', '')}
567
+ Type: {data.get('property_type', '')}
568
+ Status: {data.get('status', '')}
569
+ Location: {data.get('address', '')}, {data.get('city', '')}, {data.get('state', '')}, {data.get('country', '')}
570
+ Size: {data.get('sq_ft', '')} sq. ft.
571
+ Price: {data.get('market_value', '0')}
572
+ Bedrooms: {data.get('bedrooms', '')}
573
+ Bathrooms: {data.get('bathrooms', '')}
574
+ Year Built: {data.get('year_built', '')}
575
+ Description: {data.get('description', '')}
 
576
  """
577
+
578
+ # Use BART for summary generation
579
+ summarizer = load_model("summarization", "facebook/bart-large-cnn")
580
+
581
  # Generate initial summary
582
+ summary_result = summarizer(property_context, max_length=150, min_length=50, do_sample=False)
583
+ initial_summary = summary_result[0]['summary_text']
 
 
584
 
585
+ # Enhance summary with key features
586
  key_features = []
587
+
588
+ # Add property type and status
589
+ if data.get('property_type') and data.get('status'):
590
+ key_features.append(f"{data['property_type']} is {data['status'].lower()}")
591
+
592
+ # Add location if available
593
+ location_parts = []
594
+ if data.get('city'):
595
+ location_parts.append(data['city'])
596
+ if data.get('state'):
597
+ location_parts.append(data['state'])
598
+ if location_parts:
599
+ key_features.append(f"Located in {', '.join(location_parts)}")
600
+
601
+ # Add size and price if available
602
+ if data.get('sq_ft'):
603
+ key_features.append(f"Spans {data['sq_ft']} sq. ft.")
604
+ if data.get('market_value'):
605
+ key_features.append(f"Valued at ₹{data['market_value']}")
606
+
607
+ # Add rooms information
608
+ rooms_info = []
609
+ if data.get('bedrooms'):
610
+ rooms_info.append(f"{data['bedrooms']} bedroom{'s' if data['bedrooms'] != '1' else ''}")
611
+ if data.get('bathrooms'):
612
+ rooms_info.append(f"{data['bathrooms']} bathroom{'s' if data['bathrooms'] != '1' else ''}")
613
+ if rooms_info:
614
+ key_features.append(f"Features {' and '.join(rooms_info)}")
615
+
616
+ # Add amenities if available
617
  if data.get('amenities'):
618
  key_features.append(f"Amenities: {data['amenities']}")
619
+
620
+ # Combine initial summary with key features
621
+ enhanced_summary = initial_summary
622
  if key_features:
623
+ enhanced_summary += " " + ". ".join(key_features) + "."
624
+
625
+ # Clean up the summary
626
+ enhanced_summary = enhanced_summary.replace(" ", " ").strip()
627
 
628
  return enhanced_summary
629
  except Exception as e:
630
  logger.error(f"Error generating property summary: {str(e)}")
631
+ return "Could not generate summary."
632
 
633
  def summarize_text(text):
634
  try:
 
644
  logger.error(f"Error summarizing text: {str(e)}")
645
  return text[:200] + "..." if len(text) > 200 else text
646
 
647
+ def classify_fraud(text, data=None):
648
  try:
649
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
650
  categories = [
651
  "suspicious pricing pattern",
652
  "potentially fraudulent listing",
 
666
  - Name: {data.get('property_name', 'Not provided')}
667
  - Type: {data.get('property_type', 'Not provided')}
668
  - Status: {data.get('property_status', 'Not provided')}
669
+ - Price: {data.get('market_value', 'Not provided')}
670
+ - Square Footage: {data.get('square_footage', 'Not provided')}
671
  - Year Built: {data.get('year_built', 'Not provided')}
672
  - Location: {data.get('address', 'Not provided')}
673
  - Description: {text}
 
689
  high_risk.append((label, score))
690
  elif score > 0.5:
691
  medium_risk.append((label, score))
692
+ else:
693
  low_risk.append((label, score))
694
 
695
  # Calculate alert score with adjusted weights
 
711
  else:
712
  alert_level = 'minimal'
713
 
714
+ # Enhanced fraud indicators with more specific patterns
715
+ fraud_indicators = []
716
+
717
+ # Price-related patterns
718
+ price_patterns = [
719
+ (r'suspiciously low price', 0.8),
720
+ (r'unusually high price', 0.7),
721
+ (r'price too good to be true', 0.9),
722
+ (r'urgent sale', 0.6),
723
+ (r'must sell quickly', 0.7)
724
+ ]
725
+
726
+ # Location-related patterns
727
+ location_patterns = [
728
+ (r'location mismatch', 0.8),
729
+ (r'address inconsistency', 0.7),
730
+ (r'wrong neighborhood', 0.6),
731
+ (r'incorrect zip code', 0.7)
732
+ ]
733
+
734
+ # Document-related patterns
735
+ document_patterns = [
736
+ (r'missing documents', 0.8),
737
+ (r'unverified documents', 0.7),
738
+ (r'fake documents', 0.9),
739
+ (r'photoshopped documents', 0.8)
740
+ ]
741
+
742
+ # Urgency-related patterns
743
+ urgency_patterns = [
744
+ (r'act now', 0.6),
745
+ (r'limited time offer', 0.5),
746
+ (r'first come first served', 0.4),
747
+ (r'won\'t last long', 0.5)
748
+ ]
749
+
750
+ # Check all patterns
751
+ all_patterns = price_patterns + location_patterns + document_patterns + urgency_patterns
752
+ for pattern, weight in all_patterns:
753
+ if re.search(pattern, text.lower()):
754
+ fraud_indicators.append({
755
+ 'pattern': pattern,
756
+ 'weight': weight,
757
+ 'context': text[max(0, text.lower().find(pattern)-50):min(len(text), text.lower().find(pattern)+50)]
758
+ })
759
+
760
+ # Additional checks for data inconsistencies
761
+ if data:
762
+ # Check for suspiciously low price per square foot
763
+ try:
764
+ price = float(data.get('market_value', 0))
765
+ sqft = float(data.get('square_footage', 1))
766
+ price_per_sqft = price / sqft
767
+ if price_per_sqft < 50: # Unusually low price per square foot
768
+ fraud_indicators.append({
769
+ 'pattern': 'suspiciously low price per square foot',
770
+ 'weight': 0.8,
771
+ 'context': f'Price per square foot: ${price_per_sqft:.2f}'
772
+ })
773
+ except (ValueError, ZeroDivisionError):
774
+ pass
775
+
776
+ # Check for impossible values
777
+ try:
778
+ year_built = int(data.get('year_built', 0))
779
+ if year_built < 1800 or year_built > 2024:
780
+ fraud_indicators.append({
781
+ 'pattern': 'impossible year built',
782
+ 'weight': 0.9,
783
+ 'context': f'Year built: {year_built}'
784
+ })
785
+ except ValueError:
786
+ pass
787
+
788
+ # Check for missing critical information
789
+ critical_fields = ['property_name', 'property_type', 'address', 'market_value', 'square_footage']
790
+ missing_fields = [field for field in critical_fields if not data.get(field)]
791
+ if missing_fields:
792
+ fraud_indicators.append({
793
+ 'pattern': 'missing critical information',
794
+ 'weight': 0.7,
795
+ 'context': f'Missing fields: {", ".join(missing_fields)}'
796
+ })
797
+
798
  return {
799
  'alert_level': alert_level,
800
  'alert_score': alert_score,
801
  'high_risk': high_risk,
802
  'medium_risk': medium_risk,
803
+ 'low_risk': low_risk,
804
+ 'fraud_indicators': fraud_indicators
805
  }
806
  except Exception as e:
807
  logger.error(f"Error in fraud classification: {str(e)}")
 
810
  'alert_score': 1.0,
811
  'high_risk': [],
812
  'medium_risk': [],
813
+ 'low_risk': [],
814
+ 'fraud_indicators': []
815
  }
816
 
817
  def generate_trust_score(text, image_analysis, pdf_analysis):
 
930
  logger.error(f"Error generating trust score: {str(e)}")
931
  return 20, "Could not assess trust."
932
 
933
+ def generate_suggestions(text, data=None):
 
934
  try:
935
+ classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
936
+
937
+ # Create comprehensive context for analysis
938
+ suggestion_context = text
939
+ if data:
940
+ suggestion_context += f"""
941
+ Additional Context:
942
+ Property Type: {data.get('property_type', '')}
943
+ Location: {data.get('city', '')}, {data.get('state', '')}
944
+ Size: {data.get('sq_ft', '')} sq.ft.
945
+ Year Built: {data.get('year_built', '')}
946
+ """
947
+
948
+ # Enhanced suggestion categories based on property context
 
 
 
949
  base_suggestions = {
950
+ 'documentation': {
951
+ 'label': "add more documentation",
952
+ 'categories': [
953
+ "complete documentation provided",
954
+ "missing essential documents",
955
+ "incomplete paperwork",
956
+ "documentation needs verification"
957
+ ],
958
+ 'weight': 2.0,
959
+ 'improvements': {
960
+ 'missing essential documents': [
961
+ "Add property deed or title documents",
962
+ "Include recent property tax records",
963
+ "Attach property registration documents"
964
+ ],
965
+ 'incomplete paperwork': [
966
+ "Complete all required legal documents",
967
+ "Add missing ownership proof",
968
+ "Include property survey documents"
969
+ ]
970
+ }
971
+ },
972
+ 'details': {
973
+ 'label': "enhance property details",
974
+ 'categories': [
975
+ "detailed property information",
976
+ "basic information only",
977
+ "missing key details",
978
+ "comprehensive description"
979
+ ],
980
+ 'weight': 1.8,
981
+ 'improvements': {
982
+ 'basic information only': [
983
+ "Add more details about property features",
984
+ "Include information about recent renovations",
985
+ "Describe unique selling points"
986
+ ],
987
+ 'missing key details': [
988
+ "Specify exact built-up area",
989
+ "Add floor plan details",
990
+ "Include maintenance costs"
991
+ ]
992
+ }
993
+ },
994
+ 'images': {
995
+ 'label': "improve visual content",
996
+ 'categories': [
997
+ "high quality images provided",
998
+ "poor image quality",
999
+ "insufficient images",
1000
+ "missing key area photos"
1001
+ ],
1002
+ 'weight': 1.5,
1003
  'improvements': {
1004
+ 'poor image quality': [
1005
+ "Add high-resolution property photos",
1006
+ "Include better lighting in images",
1007
+ "Provide professional photography"
1008
  ],
1009
+ 'insufficient images': [
1010
+ "Add more interior photos",
1011
+ "Include exterior and surrounding area images",
1012
+ "Add photos of amenities"
1013
  ]
1014
  }
1015
  },
1016
+ 'pricing': {
1017
+ 'label': "pricing information",
1018
+ 'categories': [
1019
+ "detailed pricing breakdown",
1020
+ "basic price only",
1021
+ "missing price details",
1022
+ "unclear pricing terms"
1023
+ ],
1024
+ 'weight': 1.7,
1025
  'improvements': {
1026
+ 'basic price only': [
1027
+ "Add detailed price breakdown",
1028
+ "Include maintenance charges",
1029
+ "Specify additional costs"
1030
  ],
1031
+ 'missing price details': [
1032
+ "Add price per square foot",
1033
+ "Include tax implications",
1034
+ "Specify payment terms"
1035
  ]
1036
  }
1037
  },
1038
+ 'location': {
1039
+ 'label': "location details",
1040
+ 'categories': [
1041
+ "comprehensive location info",
1042
+ "basic location only",
1043
+ "missing location details",
1044
+ "unclear accessibility info"
1045
+ ],
1046
+ 'weight': 1.6,
1047
  'improvements': {
1048
+ 'basic location only': [
1049
+ "Add nearby landmarks and distances",
1050
+ "Include transportation options",
1051
+ "Specify neighborhood facilities"
1052
  ],
1053
+ 'missing location details': [
1054
+ "Add exact GPS coordinates",
1055
+ "Include area development plans",
1056
+ "Specify distance to key facilities"
1057
  ]
1058
  }
1059
  }
1060
  }
1061
+
1062
  suggestions = []
1063
  confidence_scores = []
1064
+
 
1065
  for aspect, config in base_suggestions.items():
1066
+ # Analyze each aspect with context
1067
+ result = classifier(suggestion_context[:1000], config['categories'])
1068
+
1069
+ # Get the most relevant category
1070
+ top_category = result['labels'][0]
1071
+ confidence = float(result['scores'][0])
1072
+
1073
+ # If the category indicates improvement needed (confidence < 0.6)
1074
+ if confidence < 0.6 and top_category in config['improvements']:
1075
+ weighted_confidence = confidence * config['weight']
1076
+ for improvement in config['improvements'][top_category]:
1077
+ suggestions.append({
1078
+ 'aspect': aspect,
1079
+ 'category': top_category,
1080
+ 'suggestion': improvement,
1081
+ 'confidence': weighted_confidence
1082
+ })
1083
+ confidence_scores.append(weighted_confidence)
1084
+
1085
+ # Sort suggestions by confidence and priority
1086
+ suggestions.sort(key=lambda x: x['confidence'], reverse=True)
1087
+
1088
+ # Property type specific suggestions
1089
+ if data and data.get('property_type'):
1090
+ property_type = data['property_type'].lower()
1091
+ type_specific_suggestions = {
1092
+ 'residential': [
1093
+ "Add information about school districts",
1094
+ "Include details about neighborhood safety",
1095
+ "Specify parking arrangements"
1096
+ ],
1097
+ 'commercial': [
1098
+ "Add foot traffic statistics",
1099
+ "Include zoning information",
1100
+ "Specify business licenses required"
1101
+ ],
1102
+ 'industrial': [
1103
+ "Add power supply specifications",
1104
+ "Include environmental clearances",
1105
+ "Specify loading/unloading facilities"
1106
+ ],
1107
+ 'land': [
1108
+ "Add soil testing reports",
1109
+ "Include development potential analysis",
1110
+ "Specify available utilities"
1111
+ ]
1112
+ }
1113
+
1114
+ for type_key, type_suggestions in type_specific_suggestions.items():
1115
+ if type_key in property_type:
1116
+ for suggestion in type_suggestions:
1117
+ suggestions.append({
1118
+ 'aspect': 'property_type_specific',
1119
+ 'category': 'type_specific_requirements',
1120
+ 'suggestion': suggestion,
1121
+ 'confidence': 0.8 # High confidence for type-specific suggestions
1122
+ })
1123
+
1124
+ # Add market-based suggestions
1125
+ if data and data.get('market_value'):
1126
  try:
1127
+ market_value = float(data['market_value'].replace('₹', '').replace(',', ''))
1128
+ if market_value > 10000000: # High-value property
1129
+ premium_suggestions = [
1130
+ "Add virtual tour of the property",
1131
+ "Include detailed investment analysis",
1132
+ "Provide historical price trends"
1133
+ ]
1134
+ for suggestion in premium_suggestions:
 
 
 
1135
  suggestions.append({
1136
+ 'aspect': 'premium_property',
1137
+ 'category': 'high_value_requirements',
1138
+ 'suggestion': suggestion,
1139
+ 'confidence': 0.9
1140
  })
1141
+ except ValueError:
1142
+ pass
 
 
 
 
 
1143
 
1144
+ # Calculate overall completeness score
1145
+ completeness_score = sum(confidence_scores) / len(confidence_scores) if confidence_scores else 0
1146
+ completeness_score = min(100, max(0, completeness_score * 100))
1147
+
1148
+ return {
1149
+ 'suggestions': suggestions[:10], # Return top 10 suggestions
1150
+ 'completeness_score': completeness_score,
1151
+ 'priority_aspects': [s['aspect'] for s in suggestions[:3]],
1152
+ 'improvement_summary': f"Focus on improving {', '.join([s['aspect'] for s in suggestions[:3]])}",
1153
+ 'total_suggestions': len(suggestions)
1154
+ }
1155
  except Exception as e:
1156
  logger.error(f"Error generating suggestions: {str(e)}")
1157
+ return {
1158
+ 'suggestions': [
1159
+ {
1160
+ 'aspect': 'general',
1161
+ 'category': 'basic_requirements',
1162
+ 'suggestion': 'Please provide more property details',
1163
+ 'confidence': 0.5
1164
+ }
1165
+ ],
1166
+ 'completeness_score': 0,
1167
+ 'priority_aspects': ['general'],
1168
+ 'improvement_summary': "Add basic property information",
1169
+ 'total_suggestions': 1
1170
+ }
1171
 
1172
  def assess_text_quality(text):
1173
  try:
 
1297
  'verification_score': 0.0
1298
  }
1299
 
1300
+ if data['zip']:
 
1301
  try:
1302
  response = requests.get(f"https://api.postalpincode.in/pincode/{data['zip']}", timeout=5)
1303
  if response.status_code == 200:
 
1319
  logger.error(f"Pincode API error: {str(e)}")
1320
  address_results['issues'].append("Pincode validation failed")
1321
 
1322
+ full_address = ', '.join(filter(None, [data['address'], data['city'], data['state'], data['country'], data['zip']]))
1323
+ for attempt in range(3):
 
 
 
 
 
 
 
 
1324
  try:
1325
+ location = geocoder.geocode(full_address)
 
 
 
 
1326
  if location:
1327
  address_results['address_exists'] = True
1328
  address_results['confidence'] = 0.9
1329
+ if data['latitude'] and data['longitude']:
 
 
1330
  try:
1331
  provided_coords = (float(data['latitude']), float(data['longitude']))
1332
  geocoded_coords = (location.latitude, location.longitude)
 
1335
  address_results['coordinates_match'] = dist < 1.0
1336
  if not address_results['coordinates_match']:
1337
  address_results['issues'].append(f"Coordinates {dist:.2f}km off")
1338
+ except:
 
1339
  address_results['issues'].append("Invalid coordinates")
1340
+ break
1341
+ time.sleep(1)
1342
  except Exception as e:
1343
+ logger.error(f"Geocoding error on attempt {attempt + 1}: {str(e)}")
1344
+ time.sleep(1)
1345
+ else:
1346
+ address_results['issues'].append("Address geocoding failed")
1347
 
 
1348
  verification_points = (
1349
  address_results['address_exists'] * 0.4 +
1350
  address_results['pincode_valid'] * 0.3 +
 
1355
 
1356
  return address_results
1357
  except Exception as e:
1358
+ logger.error(f"Error verifying address: {str(e)}")
1359
+ address_results['issues'].append(str(e))
1360
+ return address_results
 
 
 
 
 
 
 
1361
 
1362
  def perform_cross_validation(data):
1363
  try:
 
1737
  if data['city'] and data['state']:
1738
  for attempt in range(3):
1739
  try:
1740
+ location = geocoder.geocode(f"{data['city']}, {data['state']}, India")
1741
  if location:
1742
  location_quality = "verified"
1743
  break
 
2491
  'confidence': 0.0
2492
  }
2493
 
2494
+ # Update the load_model function to use memory optimizations
2495
+ @lru_cache(maxsize=3) # Limit cache size
2496
+ def load_model(task, model_name):
2497
+ try:
2498
+ logger.info(f"Loading model: {model_name} for task: {task}")
2499
+
2500
+ # Use smaller, more efficient models
2501
+ if task == "zero-shot-classification":
2502
+ # Use smaller model for zero-shot classification
2503
+ model_name = "facebook/bart-large-mnli" # ~1.6GB
2504
+ return pipeline(task, model=model_name, device=-1)
2505
+ elif task == "summarization":
2506
+ # Use smaller model for summarization
2507
+ model_name = "facebook/bart-large-cnn" # ~1.6GB
2508
+ return pipeline(task, model=model_name, device=-1)
2509
+ elif task == "text-classification":
2510
+ # Use very small model for text classification
2511
+ model_name = "distilbert-base-uncased" # ~260MB
2512
+ return pipeline(task, model=model_name, device=-1)
2513
+ elif task == "feature-extraction":
2514
+ # Use small model for feature extraction
2515
+ model_name = "sentence-transformers/all-MiniLM-L6-v2" # ~80MB
2516
+ return pipeline(task, model=model_name, device=-1)
2517
+ else:
2518
+ # Default to small model for unknown tasks
2519
+ model_name = "distilbert-base-uncased"
2520
+ return pipeline(task, model=model_name, device=-1)
2521
+ except Exception as e:
2522
+ logger.error(f"Error loading model {model_name}: {str(e)}")
2523
+ raise
2524
+
2525
+ # Add memory cleanup function
2526
+ def clear_model_cache():
2527
+ """Clear model cache and free up memory"""
2528
+ load_model.cache_clear()
2529
+ gc.collect()
2530
+ if torch.cuda.is_available():
2531
+ torch.cuda.empty_cache()
2532
+ logger.info("Model cache cleared and memory freed")
2533
+
2534
  if __name__ == '__main__':
2535
+ # Set up ngrok
2536
+ http_tunnel = ngrok.connect(5000)
2537
+ print(f' * Public URL: {http_tunnel.public_url}')
2538
+
2539
+ # Run Flask app in a separate thread
2540
+ def run_flask():
2541
+ app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=False)
2542
+
2543
+ flask_thread = threading.Thread(target=run_flask)
2544
+ flask_thread.daemon = True
2545
+ flask_thread.start()
2546
+
2547
+ try:
2548
+ # Keep the main thread running
2549
+ while True:
2550
+ time.sleep(1)
2551
+ except KeyboardInterrupt:
2552
+ print(" * Shutting down server...")
2553
+ ngrok.disconnect(http_tunnel.public_url)
2554
+ ngrok.kill()