Spaces:
Running
on
Zero
Running
on
Zero
import random | |
import hashlib | |
import numpy as np | |
import sqlite3 | |
import re | |
import traceback | |
from typing import List, Dict, Tuple, Optional, Any | |
from dataclasses import dataclass | |
from sentence_transformers import SentenceTransformer | |
import torch | |
from sklearn.metrics.pairwise import cosine_similarity | |
from dog_database import get_dog_description | |
from breed_health_info import breed_health_info | |
from breed_noise_info import breed_noise_info | |
from scoring_calculation_system import UserPreferences, calculate_compatibility_score, UnifiedScoringSystem, calculate_unified_breed_scores | |
from query_understanding import QueryUnderstandingEngine, analyze_user_query | |
from constraint_manager import ConstraintManager, apply_breed_constraints | |
from multi_head_scorer import MultiHeadScorer, score_breed_candidates, BreedScore | |
from score_calibrator import ScoreCalibrator, calibrate_breed_scores | |
from config_manager import get_config_manager, get_standardized_breed_data | |
class BreedDescriptionVector: | |
"""Data structure for breed description vectorization""" | |
breed_name: str | |
description_text: str | |
embedding: np.ndarray | |
characteristics: Dict[str, Any] | |
class SemanticBreedRecommender: | |
""" | |
Enhanced SBERT-based semantic breed recommendation system | |
Provides multi-dimensional natural language understanding for dog breed recommendations | |
""" | |
def __init__(self): | |
"""Initialize the semantic recommender""" | |
self.model_name = 'all-MiniLM-L6-v2' # Efficient SBERT model | |
self.sbert_model = None | |
self._sbert_loading_attempted = False | |
self.breed_vectors = {} | |
self.breed_list = self._get_breed_list() | |
self.comparative_keywords = { | |
'most': 1.0, 'love': 1.0, 'prefer': 0.9, 'like': 0.8, | |
'then': 0.7, 'second': 0.7, 'followed': 0.6, | |
'third': 0.5, 'least': 0.3, 'dislike': 0.2 | |
} | |
# Defer SBERT model loading until needed in GPU context | |
# This prevents CUDA initialization issues in ZeroGPU environment | |
print("SemanticBreedRecommender initialized (SBERT loading deferred)") | |
# Initialize multi-head scorer with SBERT model if enhanced mode is enabled | |
# if self.sbert_model: | |
# self.multi_head_scorer = MultiHeadScorer(self.sbert_model) | |
# print("Multi-head scorer initialized with SBERT model") | |
def _get_breed_list(self) -> List[str]: | |
"""Get breed list from database""" | |
try: | |
conn = sqlite3.connect('animal_detector.db') | |
cursor = conn.cursor() | |
cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog") | |
breeds = [row[0] for row in cursor.fetchall()] | |
cursor.close() | |
conn.close() | |
return breeds | |
except Exception as e: | |
print(f"Error getting breed list: {str(e)}") | |
# Backup breed list for Google Colab environment | |
return ['Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', | |
'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier'] | |
def _initialize_model(self): | |
"""Initialize SBERT model with fallback - designed for ZeroGPU compatibility""" | |
if self.sbert_model is not None or self._sbert_loading_attempted: | |
return self.sbert_model | |
try: | |
print("Loading SBERT model in GPU context...") | |
# Try different model names if the primary one fails | |
model_options = ['all-MiniLM-L6-v2', 'all-mpnet-base-v2', 'all-MiniLM-L12-v2'] | |
for model_name in model_options: | |
try: | |
# Specify device explicitly to handle ZeroGPU environment | |
import torch | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
self.sbert_model = SentenceTransformer(model_name, device=device) | |
self.model_name = model_name | |
print(f"SBERT model {model_name} loaded successfully on {device}") | |
return self.sbert_model | |
except Exception as model_e: | |
print(f"Failed to load {model_name}: {str(model_e)}") | |
continue | |
# If all models fail | |
print("All SBERT models failed to load. Using basic text matching fallback.") | |
self.sbert_model = None | |
return None | |
except Exception as e: | |
print(f"Failed to initialize any SBERT model: {str(e)}") | |
print(traceback.format_exc()) | |
print("Will provide basic text-based recommendations without embeddings") | |
self.sbert_model = None | |
return None | |
finally: | |
self._sbert_loading_attempted = True | |
def _create_breed_description(self, breed: str) -> str: | |
"""Create comprehensive natural language description for breed with all key characteristics""" | |
try: | |
# Get all information sources | |
breed_info = get_dog_description(breed) or {} | |
health_info = breed_health_info.get(breed, {}) if breed_health_info else {} | |
noise_info = breed_noise_info.get(breed, {}) if breed_noise_info else {} | |
breed_display_name = breed.replace('_', ' ') | |
description_parts = [] | |
# 1. Basic size and physical characteristics | |
size = breed_info.get('Size', 'medium').lower() | |
description_parts.append(f"{breed_display_name} is a {size} sized dog breed") | |
# 2. Temperament and personality (critical for matching) | |
temperament = breed_info.get('Temperament', '') | |
if temperament: | |
description_parts.append(f"with a {temperament.lower()} temperament") | |
# 3. Exercise and activity level (critical for apartment living) | |
exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() | |
if 'high' in exercise_needs or 'very high' in exercise_needs: | |
description_parts.append("requiring high daily exercise and mental stimulation") | |
elif 'low' in exercise_needs or 'minimal' in exercise_needs: | |
description_parts.append("with minimal exercise requirements, suitable for apartment living") | |
else: | |
description_parts.append("with moderate exercise needs") | |
# 4. Noise characteristics (critical for quiet requirements) | |
noise_level = noise_info.get('noise_level', 'moderate').lower() | |
if 'low' in noise_level or 'quiet' in noise_level: | |
description_parts.append("known for being quiet and rarely barking") | |
elif 'high' in noise_level or 'loud' in noise_level: | |
description_parts.append("tends to be vocal and bark frequently") | |
else: | |
description_parts.append("with moderate barking tendencies") | |
# 5. Living space compatibility | |
if size in ['small', 'tiny']: | |
description_parts.append("excellent for small apartments and limited spaces") | |
elif size in ['large', 'giant']: | |
description_parts.append("requiring large living spaces and preferably a yard") | |
else: | |
description_parts.append("adaptable to various living situations") | |
# 6. Grooming and maintenance | |
grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() | |
if 'high' in grooming_needs: | |
description_parts.append("requiring regular professional grooming") | |
elif 'low' in grooming_needs: | |
description_parts.append("with minimal grooming requirements") | |
else: | |
description_parts.append("with moderate grooming needs") | |
# 7. Family compatibility | |
good_with_children = breed_info.get('Good with Children', 'Yes') | |
if good_with_children == 'Yes': | |
description_parts.append("excellent with children and families") | |
else: | |
description_parts.append("better suited for adult households") | |
# 8. Intelligence and trainability (from database description) | |
intelligence_keywords = [] | |
description_text = breed_info.get('Description', '').lower() | |
if description_text: | |
# Extract intelligence indicators from description | |
if any(word in description_text for word in ['intelligent', 'smart', 'clever', 'quick to learn']): | |
intelligence_keywords.extend(['highly intelligent', 'trainable', 'quick learner']) | |
elif any(word in description_text for word in ['stubborn', 'independent', 'difficult to train']): | |
intelligence_keywords.extend(['independent minded', 'requires patience', 'challenging to train']) | |
else: | |
intelligence_keywords.extend(['moderate intelligence', 'trainable with consistency']) | |
# Extract working/purpose traits from description | |
if any(word in description_text for word in ['working', 'herding', 'guard', 'hunting']): | |
intelligence_keywords.extend(['working breed', 'purpose-driven', 'task-oriented']) | |
elif any(word in description_text for word in ['companion', 'lap', 'toy', 'decorative']): | |
intelligence_keywords.extend(['companion breed', 'affectionate', 'people-focused']) | |
# Add intelligence context to description | |
if intelligence_keywords: | |
description_parts.append(f"characterized as {', '.join(intelligence_keywords[:2])}") | |
# 9. Special characteristics and purpose (enhanced with database mining) | |
if breed_info.get('Description'): | |
desc = breed_info.get('Description', '')[:150] # Increased to 150 chars for more context | |
if desc: | |
# Extract key traits from description for better semantic matching | |
desc_lower = desc.lower() | |
key_traits = [] | |
# Extract key behavioral traits from description | |
if 'friendly' in desc_lower: | |
key_traits.append('friendly') | |
if 'gentle' in desc_lower: | |
key_traits.append('gentle') | |
if 'energetic' in desc_lower or 'active' in desc_lower: | |
key_traits.append('energetic') | |
if 'calm' in desc_lower or 'peaceful' in desc_lower: | |
key_traits.append('calm') | |
if 'protective' in desc_lower or 'guard' in desc_lower: | |
key_traits.append('protective') | |
trait_text = f" and {', '.join(key_traits)}" if key_traits else "" | |
description_parts.append(f"Known for: {desc.lower()}{trait_text}") | |
# 9. Care level requirements | |
try: | |
care_level = breed_info.get('Care Level', 'moderate') | |
if isinstance(care_level, str): | |
description_parts.append(f"requiring {care_level.lower()} overall care level") | |
else: | |
description_parts.append("requiring moderate overall care level") | |
except Exception as e: | |
print(f"Error processing care level for {breed}: {str(e)}") | |
description_parts.append("requiring moderate overall care level") | |
# 10. Lifespan information | |
try: | |
lifespan = breed_info.get('Lifespan', '10-12 years') | |
if lifespan and isinstance(lifespan, str) and lifespan.strip(): | |
description_parts.append(f"with a typical lifespan of {lifespan}") | |
else: | |
description_parts.append("with a typical lifespan of 10-12 years") | |
except Exception as e: | |
print(f"Error processing lifespan for {breed}: {str(e)}") | |
description_parts.append("with a typical lifespan of 10-12 years") | |
# Create comprehensive description | |
full_description = '. '.join(description_parts) + '.' | |
# Add comprehensive keywords for better semantic matching | |
keywords = [] | |
# Basic breed name keywords | |
keywords.extend([word.lower() for word in breed_display_name.split()]) | |
# Temperament keywords | |
if temperament: | |
keywords.extend([word.lower().strip(',') for word in temperament.split()]) | |
# Size-based keywords | |
if 'small' in size or 'tiny' in size: | |
keywords.extend(['small', 'tiny', 'compact', 'little', 'apartment', 'indoor', 'lap']) | |
elif 'large' in size or 'giant' in size: | |
keywords.extend(['large', 'big', 'giant', 'huge', 'yard', 'space', 'outdoor']) | |
else: | |
keywords.extend(['medium', 'moderate', 'average', 'balanced']) | |
# Activity level keywords | |
exercise_needs = breed_info.get('Exercise Needs', 'moderate').lower() | |
if 'high' in exercise_needs: | |
keywords.extend(['active', 'energetic', 'exercise', 'outdoor', 'hiking', 'running', 'athletic']) | |
elif 'low' in exercise_needs: | |
keywords.extend(['calm', 'low-energy', 'indoor', 'relaxed', 'couch', 'sedentary']) | |
else: | |
keywords.extend(['moderate', 'balanced', 'walks', 'regular']) | |
# Noise level keywords | |
noise_level = noise_info.get('noise_level', 'moderate').lower() | |
if 'quiet' in noise_level or 'low' in noise_level: | |
keywords.extend(['quiet', 'silent', 'calm', 'peaceful', 'low-noise']) | |
elif 'high' in noise_level or 'loud' in noise_level: | |
keywords.extend(['vocal', 'barking', 'loud', 'alert', 'watchdog']) | |
# Living situation keywords | |
if size in ['small', 'tiny'] and 'low' in exercise_needs: | |
keywords.extend(['apartment', 'city', 'urban', 'small-space']) | |
if size in ['large', 'giant'] or 'high' in exercise_needs: | |
keywords.extend(['house', 'yard', 'suburban', 'rural', 'space']) | |
# Family keywords | |
good_with_children = breed_info.get('Good with Children', 'Yes') | |
if good_with_children == 'Yes': | |
keywords.extend(['family', 'children', 'kids', 'friendly', 'gentle']) | |
# Intelligence and trainability keywords (from database description mining) | |
if intelligence_keywords: | |
keywords.extend([word.lower() for phrase in intelligence_keywords for word in phrase.split()]) | |
# Grooming-based keywords (enhanced) | |
grooming_needs = breed_info.get('Grooming Needs', 'moderate').lower() | |
if 'high' in grooming_needs: | |
keywords.extend(['high-maintenance', 'professional-grooming', 'daily-brushing', 'coat-care']) | |
elif 'low' in grooming_needs: | |
keywords.extend(['low-maintenance', 'minimal-grooming', 'easy-care', 'wash-and-go']) | |
else: | |
keywords.extend(['moderate-grooming', 'weekly-brushing', 'regular-care']) | |
# Lifespan-based keywords | |
lifespan = breed_info.get('Lifespan', '10-12 years') | |
if lifespan and isinstance(lifespan, str): | |
try: | |
# Extract years from lifespan string (e.g., "10-12 years" or "12-15 years") | |
import re | |
years = re.findall(r'\d+', lifespan) | |
if years: | |
avg_years = sum(int(y) for y in years) / len(years) | |
if avg_years >= 14: | |
keywords.extend(['long-lived', 'longevity', 'durable', 'healthy-lifespan']) | |
elif avg_years <= 8: | |
keywords.extend(['shorter-lifespan', 'health-considerations', 'special-care']) | |
else: | |
keywords.extend(['average-lifespan', 'moderate-longevity']) | |
except: | |
keywords.extend(['average-lifespan']) | |
# Add keywords to description for better semantic matching | |
unique_keywords = list(set(keywords)) | |
keyword_text = ' '.join(unique_keywords) | |
full_description += f" Additional context: {keyword_text}" | |
return full_description | |
except Exception as e: | |
print(f"Error creating description for {breed}: {str(e)}") | |
return f"{breed.replace('_', ' ')} is a dog breed with unique characteristics." | |
def _build_breed_vectors(self): | |
"""Build vector representations for all breeds - called lazily when needed""" | |
try: | |
print("Building breed vector database...") | |
# Initialize model if not already done | |
if self.sbert_model is None: | |
self._initialize_model() | |
# Skip if model is not available | |
if self.sbert_model is None: | |
print("SBERT model not available, skipping vector building") | |
return | |
for breed in self.breed_list: | |
description = self._create_breed_description(breed) | |
# Generate embedding vector | |
embedding = self.sbert_model.encode(description, convert_to_tensor=False) | |
# Get breed characteristics | |
breed_info = get_dog_description(breed) | |
characteristics = { | |
'size': breed_info.get('Size', 'Medium') if breed_info else 'Medium', | |
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate', | |
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate', | |
'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes', | |
'temperament': breed_info.get('Temperament', '') if breed_info else '' | |
} | |
self.breed_vectors[breed] = BreedDescriptionVector( | |
breed_name=breed, | |
description_text=description, | |
embedding=embedding, | |
characteristics=characteristics | |
) | |
print(f"Successfully built {len(self.breed_vectors)} breed vectors") | |
except Exception as e: | |
print(f"Error building breed vectors: {str(e)}") | |
print(traceback.format_exc()) | |
raise | |
def _parse_comparative_preferences(self, user_input: str) -> Dict[str, float]: | |
"""Parse comparative preference expressions""" | |
breed_scores = {} | |
# Normalize input | |
text = user_input.lower() | |
# Find breed names and preference keywords | |
for breed in self.breed_list: | |
breed_display = breed.replace('_', ' ').lower() | |
breed_words = breed_display.split() | |
# Check if this breed is mentioned | |
breed_mentioned = False | |
for word in breed_words: | |
if word in text: | |
breed_mentioned = True | |
break | |
if breed_mentioned: | |
# Find nearby preference keywords | |
breed_score = 0.5 # Default score | |
# Look for keywords within 50 characters of breed name | |
breed_pos = text.find(breed_words[0]) | |
if breed_pos != -1: | |
# Check for keywords in context | |
context_start = max(0, breed_pos - 50) | |
context_end = min(len(text), breed_pos + 50) | |
context = text[context_start:context_end] | |
for keyword, score in self.comparative_keywords.items(): | |
if keyword in context: | |
breed_score = max(breed_score, score) | |
breed_scores[breed] = breed_score | |
return breed_scores | |
def _extract_lifestyle_keywords(self, user_input: str) -> Dict[str, List[str]]: | |
"""Enhanced lifestyle keyword extraction with better pattern matching""" | |
keywords = { | |
'living_space': [], | |
'activity_level': [], | |
'family_situation': [], | |
'noise_preference': [], | |
'size_preference': [], | |
'care_level': [], | |
'special_needs': [], | |
'intelligence_preference': [], | |
'grooming_preference': [], | |
'lifespan_preference': [], | |
'temperament_preference': [], | |
'experience_level': [] | |
} | |
text = user_input.lower() | |
# Enhanced living space detection | |
apartment_terms = ['apartment', 'flat', 'condo', 'small space', 'city living', 'urban', 'no yard', 'indoor'] | |
house_terms = ['house', 'yard', 'garden', 'backyard', 'large space', 'suburban', 'rural', 'farm'] | |
if any(term in text for term in apartment_terms): | |
keywords['living_space'].append('apartment') | |
if any(term in text for term in house_terms): | |
keywords['living_space'].append('house') | |
# Enhanced activity level detection | |
high_activity = ['active', 'energetic', 'exercise', 'hiking', 'running', 'outdoor', 'sports', 'jogging', | |
'athletic', 'adventure', 'vigorous', 'high energy', 'workout'] | |
low_activity = ['calm', 'lazy', 'indoor', 'low energy', 'couch', 'sedentary', 'relaxed', | |
'peaceful', 'quiet lifestyle', 'minimal exercise'] | |
moderate_activity = ['moderate', 'walk', 'daily walks', 'light exercise'] | |
if any(term in text for term in high_activity): | |
keywords['activity_level'].append('high') | |
if any(term in text for term in low_activity): | |
keywords['activity_level'].append('low') | |
if any(term in text for term in moderate_activity): | |
keywords['activity_level'].append('moderate') | |
# Enhanced family situation detection | |
children_terms = ['children', 'kids', 'family', 'child', 'toddler', 'baby', 'teenage', 'school age'] | |
elderly_terms = ['elderly', 'senior', 'old', 'retirement', 'aged', 'mature'] | |
single_terms = ['single', 'alone', 'individual', 'solo', 'myself'] | |
if any(term in text for term in children_terms): | |
keywords['family_situation'].append('children') | |
if any(term in text for term in elderly_terms): | |
keywords['family_situation'].append('elderly') | |
if any(term in text for term in single_terms): | |
keywords['family_situation'].append('single') | |
# Enhanced noise preference detection | |
quiet_terms = ['quiet', 'silent', 'noise-sensitive', 'peaceful', 'no barking', 'minimal noise', | |
'soft-spoken', 'calm', 'tranquil'] | |
noise_ok_terms = ['loud', 'barking ok', 'noise tolerant', 'vocal', 'doesn\'t matter'] | |
if any(term in text for term in quiet_terms): | |
keywords['noise_preference'].append('low') | |
if any(term in text for term in noise_ok_terms): | |
keywords['noise_preference'].append('high') | |
# Enhanced size preference detection | |
small_terms = ['small', 'tiny', 'little', 'compact', 'miniature', 'toy', 'lap dog'] | |
large_terms = ['large', 'big', 'giant', 'huge', 'massive', 'great'] | |
medium_terms = ['medium', 'moderate size', 'average', 'mid-sized'] | |
if any(term in text for term in small_terms): | |
keywords['size_preference'].append('small') | |
if any(term in text for term in large_terms): | |
keywords['size_preference'].append('large') | |
if any(term in text for term in medium_terms): | |
keywords['size_preference'].append('medium') | |
# Enhanced care level detection | |
low_care = ['low maintenance', 'easy care', 'simple', 'minimal grooming', 'wash and go'] | |
high_care = ['high maintenance', 'grooming', 'care intensive', 'professional grooming', 'daily brushing'] | |
if any(term in text for term in low_care): | |
keywords['care_level'].append('low') | |
if any(term in text for term in high_care): | |
keywords['care_level'].append('high') | |
# Intelligence preference detection (NEW) | |
smart_terms = ['smart', 'intelligent', 'clever', 'bright', 'quick learner', 'easy to train', 'trainable', 'genius', 'brilliant'] | |
independent_terms = ['independent', 'stubborn', 'strong-willed', 'less trainable', 'thinks for themselves'] | |
if any(term in text for term in smart_terms): | |
keywords['intelligence_preference'].append('high') | |
if any(term in text for term in independent_terms): | |
keywords['intelligence_preference'].append('independent') | |
# Grooming preference detection (NEW) | |
low_grooming_terms = ['low grooming', 'minimal grooming', 'easy care', 'wash and wear', 'no grooming', 'simple coat'] | |
high_grooming_terms = ['high grooming', 'professional grooming', 'lots of care', 'high maintenance coat', 'daily brushing', 'regular grooming'] | |
if any(term in text for term in low_grooming_terms): | |
keywords['grooming_preference'].append('low') | |
if any(term in text for term in high_grooming_terms): | |
keywords['grooming_preference'].append('high') | |
# Lifespan preference detection (NEW) | |
long_lived_terms = ['long lived', 'long lifespan', 'live long', 'many years', '15+ years', 'longevity'] | |
healthy_terms = ['healthy breed', 'few health issues', 'robust', 'hardy', 'strong constitution'] | |
if any(term in text for term in long_lived_terms): | |
keywords['lifespan_preference'].append('long') | |
if any(term in text for term in healthy_terms): | |
keywords['lifespan_preference'].append('healthy') | |
# Temperament preference detection (NEW) | |
gentle_terms = ['gentle', 'calm', 'peaceful', 'laid back', 'chill', 'mellow', 'docile'] | |
playful_terms = ['playful', 'energetic', 'fun', 'active personality', 'lively', 'spirited', 'bouncy'] | |
protective_terms = ['protective', 'guard', 'watchdog', 'alert', 'vigilant', 'defensive'] | |
friendly_terms = ['friendly', 'social', 'outgoing', 'loves people', 'sociable', 'gregarious'] | |
if any(term in text for term in gentle_terms): | |
keywords['temperament_preference'].append('gentle') | |
if any(term in text for term in playful_terms): | |
keywords['temperament_preference'].append('playful') | |
if any(term in text for term in protective_terms): | |
keywords['temperament_preference'].append('protective') | |
if any(term in text for term in friendly_terms): | |
keywords['temperament_preference'].append('friendly') | |
# Experience level detection (NEW) | |
beginner_terms = ['first time', 'beginner', 'new to dogs', 'never had', 'novice', 'inexperienced'] | |
advanced_terms = ['experienced', 'advanced', 'dog expert', 'many dogs before', 'professional', 'seasoned'] | |
if any(term in text for term in beginner_terms): | |
keywords['experience_level'].append('beginner') | |
if any(term in text for term in advanced_terms): | |
keywords['experience_level'].append('advanced') | |
# Enhanced special needs detection | |
guard_terms = ['guard', 'protection', 'security', 'watchdog', 'protective', 'defender'] | |
companion_terms = ['therapy', 'emotional support', 'companion', 'comfort', 'lap dog', 'cuddly'] | |
hypoallergenic_terms = ['hypoallergenic', 'allergies', 'non-shedding', 'allergy-friendly', 'no shed'] | |
multi_pet_terms = ['good with cats', 'cat friendly', 'multi-pet', 'other animals'] | |
if any(term in text for term in guard_terms): | |
keywords['special_needs'].append('guard') | |
if any(term in text for term in companion_terms): | |
keywords['special_needs'].append('companion') | |
if any(term in text for term in hypoallergenic_terms): | |
keywords['special_needs'].append('hypoallergenic') | |
if any(term in text for term in multi_pet_terms): | |
keywords['special_needs'].append('multi_pet') | |
return keywords | |
def _apply_size_distribution_correction(self, recommendations: List[Dict]) -> List[Dict]: | |
"""Apply size distribution correction to prevent large breed bias""" | |
if len(recommendations) < 10: | |
return recommendations | |
# Analyze size distribution | |
size_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0} | |
for rec in recommendations: | |
breed_info = get_dog_description(rec['breed']) | |
if breed_info: | |
size = self._normalize_breed_size(breed_info.get('Size', 'Medium')) | |
size_counts[size] += 1 | |
total_recs = len(recommendations) | |
large_giant_ratio = (size_counts['large'] + size_counts['giant']) / total_recs | |
# If more than 70% are large/giant breeds, apply correction | |
if large_giant_ratio > 0.7: | |
corrected_recommendations = [] | |
size_quotas = {'toy': 2, 'small': 4, 'medium': 6, 'large': 2, 'giant': 1} | |
current_counts = {'toy': 0, 'small': 0, 'medium': 0, 'large': 0, 'giant': 0} | |
# First pass: add breeds within quotas | |
for rec in recommendations: | |
breed_info = get_dog_description(rec['breed']) | |
if breed_info: | |
size = self._normalize_breed_size(breed_info.get('Size', 'Medium')) | |
if current_counts[size] < size_quotas[size]: | |
corrected_recommendations.append(rec) | |
current_counts[size] += 1 | |
# Second pass: fill remaining slots with best remaining candidates | |
remaining_slots = 15 - len(corrected_recommendations) | |
remaining_breeds = [rec for rec in recommendations if rec not in corrected_recommendations] | |
corrected_recommendations.extend(remaining_breeds[:remaining_slots]) | |
return corrected_recommendations | |
return recommendations | |
def _normalize_breed_size(self, size: str) -> str: | |
"""Normalize breed size to standard categories""" | |
if not isinstance(size, str): | |
return 'medium' | |
size_lower = size.lower() | |
if any(term in size_lower for term in ['toy', 'tiny']): | |
return 'toy' | |
elif 'small' in size_lower: | |
return 'small' | |
elif 'medium' in size_lower: | |
return 'medium' | |
elif 'large' in size_lower: | |
return 'large' | |
elif any(term in size_lower for term in ['giant', 'extra large']): | |
return 'giant' | |
else: | |
return 'medium' | |
def _parse_user_requirements(self, user_input: str) -> Dict[str, Any]: | |
"""Parse user requirements more accurately""" | |
requirements = { | |
'living_space': None, | |
'exercise_level': None, | |
'preferred_size': None, | |
'noise_tolerance': None | |
} | |
input_lower = user_input.lower() | |
# Living space detection | |
if 'apartment' in input_lower or 'small' in input_lower: | |
requirements['living_space'] = 'apartment' | |
elif 'large house' in input_lower or 'big' in input_lower: | |
requirements['living_space'] = 'large_house' | |
elif 'medium' in input_lower: | |
requirements['living_space'] = 'medium_house' | |
# Exercise level detection | |
if "don't exercise" in input_lower or 'low exercise' in input_lower: | |
requirements['exercise_level'] = 'low' | |
elif any(term in input_lower for term in ['hiking', 'running', 'active']): | |
requirements['exercise_level'] = 'high' | |
elif '30 minutes' in input_lower or 'moderate' in input_lower: | |
requirements['exercise_level'] = 'moderate' | |
# Size preference detection | |
if any(term in input_lower for term in ['small dog', 'tiny', 'toy']): | |
requirements['preferred_size'] = 'small' | |
elif any(term in input_lower for term in ['large dog', 'big dog']): | |
requirements['preferred_size'] = 'large' | |
elif 'medium' in input_lower: | |
requirements['preferred_size'] = 'medium' | |
return requirements | |
def _apply_hard_constraints(self, breed: str, user_input: str, breed_characteristics: Dict[str, Any]) -> float: | |
"""Enhanced hard constraints with stricter penalties""" | |
penalty = 0.0 | |
user_text_lower = user_input.lower() | |
# Get breed information | |
breed_info = get_dog_description(breed) | |
if not breed_info: | |
return 0.0 | |
breed_size = breed_info.get('Size', '').lower() | |
exercise_needs = breed_info.get('Exercise Needs', '').lower() | |
# Apartment living constraints - MUCH STRICTER | |
if any(term in user_text_lower for term in ['apartment', 'flat', 'studio', 'small space']): | |
if 'giant' in breed_size: | |
return -2.0 # Complete elimination | |
elif 'large' in breed_size: | |
if any(term in exercise_needs for term in ['high', 'very high']): | |
return -2.0 # Complete elimination | |
else: | |
penalty -= 0.5 # Still significant penalty | |
elif 'medium' in breed_size and 'very high' in exercise_needs: | |
penalty -= 0.6 | |
# Exercise mismatch constraints | |
if "don't exercise much" in user_text_lower or "low exercise" in user_text_lower: | |
if any(term in exercise_needs for term in ['very high', 'extreme', 'intense']): | |
return -2.0 # Complete elimination | |
elif 'high' in exercise_needs: | |
penalty -= 0.8 | |
# Moderate lifestyle detection | |
if any(term in user_text_lower for term in ['moderate', 'balanced', '30 minutes', 'half hour']): | |
# Penalize extremes | |
if 'giant' in breed_size: | |
penalty -= 0.7 # Strong penalty for giants | |
elif 'very high' in exercise_needs: | |
penalty -= 0.5 | |
# Children safety (existing logic remains but enhanced) | |
if any(term in user_text_lower for term in ['child', 'kids', 'family', 'baby']): | |
good_with_children = breed_info.get('Good with Children', '').lower() | |
if good_with_children == 'no': | |
return -2.0 # Complete elimination for safety | |
return penalty | |
def get_enhanced_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: | |
""" | |
Enhanced multi-dimensional semantic breed recommendation | |
Args: | |
user_input: User's natural language description | |
top_k: Number of recommendations to return | |
Returns: | |
List of recommended breeds with enhanced scoring | |
""" | |
try: | |
# Stage 1: Query Understanding | |
dimensions = self.query_engine.analyze_query(user_input) | |
print(f"Query dimensions detected: {len(dimensions.spatial_constraints + dimensions.activity_level + dimensions.noise_preferences + dimensions.size_preferences + dimensions.family_context + dimensions.maintenance_level + dimensions.special_requirements)} total dimensions") | |
# Stage 2: Apply Constraints | |
filter_result = self.constraint_manager.apply_constraints(dimensions, min_candidates=max(8, top_k)) | |
print(f"Constraint filtering: {len(self.breed_list)} -> {len(filter_result.passed_breeds)} candidates") | |
if not filter_result.passed_breeds: | |
error_msg = f"No dog breeds match your requirements after applying constraints. Applied constraints: {filter_result.applied_constraints}. Consider relaxing some requirements." | |
print(f"ERROR: {error_msg}") | |
raise ValueError(error_msg) | |
# Stage 3: Multi-head Scoring | |
if self.multi_head_scorer: | |
breed_scores = self.multi_head_scorer.score_breeds(filter_result.passed_breeds, dimensions) | |
print(f"Multi-head scoring completed for {len(breed_scores)} breeds") | |
else: | |
print("Multi-head scorer not available, using fallback scoring") | |
return self.get_semantic_recommendations(user_input, top_k) | |
# Stage 4: Score Calibration | |
breed_score_tuples = [(score.breed_name, score.final_score) for score in breed_scores] | |
calibration_result = self.score_calibrator.calibrate_scores(breed_score_tuples) | |
print(f"Score calibration: method={calibration_result.calibration_method}") | |
# Stage 5: Generate Final Recommendations | |
final_recommendations = [] | |
for i, breed_score in enumerate(breed_scores[:top_k]): | |
breed_name = breed_score.breed_name | |
# Get calibrated score | |
calibrated_score = calibration_result.score_mapping.get(breed_name, breed_score.final_score) | |
# Get standardized breed info | |
standardized_info = get_standardized_breed_data(breed_name.replace(' ', '_')) | |
if standardized_info: | |
breed_info = self._get_breed_info_from_standardized(standardized_info) | |
else: | |
breed_info = get_dog_description(breed_name.replace(' ', '_')) or {} | |
recommendation = { | |
'breed': breed_name, | |
'rank': i + 1, | |
'overall_score': calibrated_score, | |
'final_score': calibrated_score, | |
'semantic_score': breed_score.semantic_component, | |
'attribute_score': breed_score.attribute_component, | |
'bidirectional_bonus': breed_score.bidirectional_bonus, | |
'confidence_score': breed_score.confidence_score, | |
'dimensional_breakdown': breed_score.dimensional_breakdown, | |
'explanation': breed_score.explanation, | |
'size': breed_info.get('Size', 'Unknown'), | |
'temperament': breed_info.get('Temperament', ''), | |
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), | |
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), | |
'good_with_children': breed_info.get('Good with Children', 'Yes'), | |
'lifespan': breed_info.get('Lifespan', '10-12 years'), | |
'description': breed_info.get('Description', ''), | |
'search_type': 'enhanced_description', | |
'calibration_method': calibration_result.calibration_method, | |
'applied_constraints': filter_result.applied_constraints, | |
'relaxed_constraints': filter_result.relaxed_constraints, | |
'warnings': filter_result.warnings | |
} | |
final_recommendations.append(recommendation) | |
# Apply size distribution correction before returning | |
corrected_recommendations = self._apply_size_distribution_correction(final_recommendations) | |
# Stage 6: Apply Intelligent Trait Matching Enhancement | |
intelligence_enhanced_recommendations = self._apply_intelligent_trait_matching(corrected_recommendations, user_input) | |
print(f"Generated {len(intelligence_enhanced_recommendations)} enhanced semantic recommendations with intelligent trait matching") | |
return intelligence_enhanced_recommendations | |
except Exception as e: | |
print(f"Error in enhanced semantic recommendations: {str(e)}") | |
print(traceback.format_exc()) | |
# Fallback to original method | |
return self.get_semantic_recommendations(user_input, top_k) | |
def _apply_intelligent_trait_matching(self, recommendations: List[Dict], user_input: str) -> List[Dict]: | |
"""Apply intelligent trait matching based on enhanced keyword extraction and database mining""" | |
try: | |
# Extract enhanced keywords from user input | |
extracted_keywords = self._extract_lifestyle_keywords(user_input) | |
# Apply intelligent trait matching to each recommendation | |
enhanced_recommendations = [] | |
for rec in recommendations: | |
breed_name = rec['breed'].replace(' ', '_') | |
# Get breed database information | |
breed_info = get_dog_description(breed_name) or {} | |
# Calculate intelligent trait bonuses | |
intelligence_bonus = 0.0 | |
trait_match_details = {} | |
# 1. Intelligence Matching | |
if extracted_keywords.get('intelligence_preference'): | |
intelligence_pref = extracted_keywords['intelligence_preference'][0] | |
breed_desc = breed_info.get('Description', '').lower() | |
if intelligence_pref == 'high': | |
if any(word in breed_desc for word in ['intelligent', 'smart', 'clever', 'quick to learn', 'trainable']): | |
intelligence_bonus += 0.05 | |
trait_match_details['intelligence_match'] = 'High intelligence match detected' | |
elif any(word in breed_desc for word in ['stubborn', 'independent', 'difficult']): | |
intelligence_bonus -= 0.02 | |
trait_match_details['intelligence_warning'] = 'May be challenging to train' | |
elif intelligence_pref == 'independent': | |
if any(word in breed_desc for word in ['independent', 'stubborn', 'strong-willed']): | |
intelligence_bonus += 0.03 | |
trait_match_details['independence_match'] = 'Independent nature match' | |
# 2. Grooming Preference Matching | |
if extracted_keywords.get('grooming_preference'): | |
grooming_pref = extracted_keywords['grooming_preference'][0] | |
breed_grooming = breed_info.get('Grooming Needs', '').lower() | |
if grooming_pref == 'low' and 'low' in breed_grooming: | |
intelligence_bonus += 0.03 | |
trait_match_details['grooming_match'] = 'Low maintenance grooming match' | |
elif grooming_pref == 'high' and 'high' in breed_grooming: | |
intelligence_bonus += 0.03 | |
trait_match_details['grooming_match'] = 'High maintenance grooming match' | |
elif grooming_pref == 'low' and 'high' in breed_grooming: | |
intelligence_bonus -= 0.04 | |
trait_match_details['grooming_mismatch'] = 'High grooming needs may not suit preferences' | |
# 3. Temperament Preference Matching | |
if extracted_keywords.get('temperament_preference'): | |
temp_prefs = extracted_keywords['temperament_preference'] | |
breed_temperament = breed_info.get('Temperament', '').lower() | |
breed_desc = breed_info.get('Description', '').lower() | |
temp_text = (breed_temperament + ' ' + breed_desc).lower() | |
for temp_pref in temp_prefs: | |
if temp_pref == 'gentle' and any(word in temp_text for word in ['gentle', 'calm', 'peaceful', 'mild']): | |
intelligence_bonus += 0.04 | |
trait_match_details['temperament_match'] = f'Gentle temperament match: {temp_pref}' | |
elif temp_pref == 'playful' and any(word in temp_text for word in ['playful', 'energetic', 'lively', 'fun']): | |
intelligence_bonus += 0.04 | |
trait_match_details['temperament_match'] = f'Playful temperament match: {temp_pref}' | |
elif temp_pref == 'protective' and any(word in temp_text for word in ['protective', 'guard', 'alert', 'watchful']): | |
intelligence_bonus += 0.04 | |
trait_match_details['temperament_match'] = f'Protective temperament match: {temp_pref}' | |
elif temp_pref == 'friendly' and any(word in temp_text for word in ['friendly', 'social', 'outgoing', 'people']): | |
intelligence_bonus += 0.04 | |
trait_match_details['temperament_match'] = f'Friendly temperament match: {temp_pref}' | |
# 4. Experience Level Matching | |
if extracted_keywords.get('experience_level'): | |
exp_level = extracted_keywords['experience_level'][0] | |
breed_desc = breed_info.get('Description', '').lower() | |
if exp_level == 'beginner': | |
# Favor easy-to-handle breeds for beginners | |
if any(word in breed_desc for word in ['easy', 'gentle', 'good for beginners', 'family', 'calm']): | |
intelligence_bonus += 0.06 | |
trait_match_details['beginner_friendly'] = 'Good choice for first-time owners' | |
elif any(word in breed_desc for word in ['challenging', 'dominant', 'requires experience', 'strong-willed']): | |
intelligence_bonus -= 0.08 | |
trait_match_details['experience_warning'] = 'May be challenging for first-time owners' | |
elif exp_level == 'advanced': | |
# Advanced users can handle more challenging breeds | |
if any(word in breed_desc for word in ['working', 'requires experience', 'intelligent', 'strong']): | |
intelligence_bonus += 0.03 | |
trait_match_details['advanced_suitable'] = 'Good match for experienced owners' | |
# 5. Lifespan Preference Matching | |
if extracted_keywords.get('lifespan_preference'): | |
lifespan_pref = extracted_keywords['lifespan_preference'][0] | |
breed_lifespan = breed_info.get('Lifespan', '10-12 years') | |
try: | |
import re | |
years = re.findall(r'\d+', breed_lifespan) | |
if years: | |
avg_years = sum(int(y) for y in years) / len(years) | |
if lifespan_pref == 'long' and avg_years >= 13: | |
intelligence_bonus += 0.02 | |
trait_match_details['longevity_match'] = f'Long lifespan match: {breed_lifespan}' | |
elif lifespan_pref == 'healthy' and avg_years >= 12: | |
intelligence_bonus += 0.02 | |
trait_match_details['health_match'] = f'Healthy lifespan: {breed_lifespan}' | |
except: | |
pass | |
# Apply the intelligence bonus to the overall score | |
original_score = rec['overall_score'] | |
enhanced_score = min(1.0, original_score + intelligence_bonus) | |
# Create enhanced recommendation with trait matching details | |
enhanced_rec = rec.copy() | |
enhanced_rec['overall_score'] = enhanced_score | |
enhanced_rec['intelligence_bonus'] = intelligence_bonus | |
enhanced_rec['trait_match_details'] = trait_match_details | |
# Add detailed explanation if significant enhancement occurred | |
if abs(intelligence_bonus) > 0.02: | |
enhancement_explanation = [] | |
for detail_key, detail_value in trait_match_details.items(): | |
enhancement_explanation.append(detail_value) | |
if enhancement_explanation: | |
current_explanation = enhanced_rec.get('explanation', '') | |
enhanced_explanation = current_explanation + f" Enhanced matching: {'; '.join(enhancement_explanation)}" | |
enhanced_rec['explanation'] = enhanced_explanation | |
enhanced_recommendations.append(enhanced_rec) | |
# Re-sort by enhanced overall score | |
enhanced_recommendations.sort(key=lambda x: x['overall_score'], reverse=True) | |
# Update ranks | |
for i, rec in enumerate(enhanced_recommendations): | |
rec['rank'] = i + 1 | |
print(f"Applied intelligent trait matching with average bonus: {sum(r['intelligence_bonus'] for r in enhanced_recommendations) / len(enhanced_recommendations):.3f}") | |
return enhanced_recommendations | |
except Exception as e: | |
print(f"Error in intelligent trait matching: {str(e)}") | |
# Return original recommendations if trait matching fails | |
return recommendations | |
def get_semantic_recommendations(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: | |
""" | |
Get breed recommendations based on natural language description | |
Args: | |
user_input: User's natural language description | |
top_k: Number of recommendations to return | |
Returns: | |
List of recommended breeds | |
""" | |
try: | |
print(f"Processing user input: {user_input}") | |
# 嘗試載入SBERT模型(如果尚未載入) | |
if self.sbert_model is None: | |
self._initialize_model() | |
# Check if model is available - if not, raise error | |
if self.sbert_model is None: | |
error_msg = "SBERT model not available. This could be due to:\n• Model download failed\n• Insufficient memory\n• Network connectivity issues\n\nPlease check your environment and try again." | |
print(f"ERROR: {error_msg}") | |
raise RuntimeError(error_msg) | |
# 確保breed vectors已建構 | |
if not self.breed_vectors: | |
self._build_breed_vectors() | |
# Generate user input embedding | |
user_embedding = self.sbert_model.encode(user_input, convert_to_tensor=False) | |
# Parse comparative preferences | |
comparative_prefs = self._parse_comparative_preferences(user_input) | |
# Extract lifestyle keywords | |
lifestyle_keywords = self._extract_lifestyle_keywords(user_input) | |
# Calculate similarity with all breeds and apply constraints | |
similarities = [] | |
for breed, breed_vector in self.breed_vectors.items(): | |
# Apply hard constraints first | |
constraint_penalty = self._apply_hard_constraints(breed, user_input, breed_vector.characteristics) | |
# Skip breeds that violate critical constraints | |
if constraint_penalty <= -1.0: # Complete disqualification | |
continue | |
# Basic semantic similarity | |
semantic_score = cosine_similarity( | |
[user_embedding], | |
[breed_vector.embedding] | |
)[0][0] | |
# Comparative preference weighting | |
comparative_bonus = comparative_prefs.get(breed, 0.0) | |
# Lifestyle matching bonus | |
lifestyle_bonus = self._calculate_lifestyle_bonus( | |
breed_vector.characteristics, | |
lifestyle_keywords | |
) | |
# Apply constraint penalties | |
lifestyle_bonus += constraint_penalty | |
# Enhanced combined score with better distribution | |
# Apply exponential scaling to create more natural score spread | |
base_semantic = semantic_score ** 0.8 # Slightly compress high scores | |
enhanced_lifestyle = lifestyle_bonus * 2.0 # Amplify lifestyle matching | |
enhanced_comparative = comparative_bonus * 1.5 # Amplify breed preferences | |
final_score = ( | |
base_semantic * 0.55 + | |
enhanced_comparative * 0.30 + | |
enhanced_lifestyle * 0.15 | |
) | |
# Add small random variation to break ties naturally | |
random.seed(hash(breed)) # Consistent for same breed | |
final_score += random.uniform(-0.03, 0.03) | |
# Ensure final score doesn't exceed 1.0 | |
final_score = min(1.0, final_score) | |
similarities.append({ | |
'breed': breed, | |
'score': final_score, | |
'semantic_score': semantic_score, | |
'comparative_bonus': comparative_bonus, | |
'lifestyle_bonus': lifestyle_bonus | |
}) | |
# Calculate standardized display scores with balanced distribution | |
breed_display_scores = [] | |
# First, collect all semantic scores for normalization | |
all_semantic_scores = [breed_data['semantic_score'] for breed_data in similarities] | |
semantic_mean = np.mean(all_semantic_scores) | |
semantic_std = np.std(all_semantic_scores) if len(all_semantic_scores) > 1 else 1.0 | |
for breed_data in similarities: | |
breed = breed_data['breed'] | |
base_semantic = breed_data['semantic_score'] | |
# Normalize semantic score to prevent extreme outliers | |
if semantic_std > 0: | |
normalized_semantic = (base_semantic - semantic_mean) / semantic_std | |
normalized_semantic = max(-2.0, min(2.0, normalized_semantic)) # Cap at 2 standard deviations | |
scaled_semantic = 0.5 + (normalized_semantic * 0.1) # Map to 0.3-0.7 range | |
else: | |
scaled_semantic = 0.5 | |
# Get breed characteristics | |
breed_info = get_dog_description(breed) if breed != 'Unknown' else {} | |
breed_size = breed_info.get('Size', '').lower() if breed_info else '' | |
exercise_needs = breed_info.get('Exercise Needs', '').lower() if breed_info else '' | |
# Calculate feature matching score (more important than pure semantic similarity) | |
feature_score = 0.0 | |
user_text = user_input.lower() | |
# Size and space requirements (high weight) | |
if any(term in user_text for term in ['apartment', 'small', 'limited space']): | |
if 'small' in breed_size: | |
feature_score += 0.25 | |
elif 'medium' in breed_size: | |
feature_score += 0.05 | |
elif 'large' in breed_size or 'giant' in breed_size: | |
feature_score -= 0.30 | |
# Exercise requirements (high weight) | |
if any(term in user_text for term in ['low exercise', 'minimal exercise', "doesn't need", 'not much']): | |
if 'low' in exercise_needs or 'minimal' in exercise_needs: | |
feature_score += 0.20 | |
elif 'high' in exercise_needs or 'very high' in exercise_needs: | |
feature_score -= 0.25 | |
elif any(term in user_text for term in ['active', 'high exercise', 'running', 'hiking']): | |
if 'high' in exercise_needs: | |
feature_score += 0.20 | |
elif 'low' in exercise_needs: | |
feature_score -= 0.15 | |
# Family compatibility | |
if any(term in user_text for term in ['children', 'kids', 'family']): | |
good_with_children = breed_info.get('Good with Children', '') if breed_info else '' | |
if good_with_children == 'Yes': | |
feature_score += 0.10 | |
elif good_with_children == 'No': | |
feature_score -= 0.20 | |
# Combine scores with balanced weights | |
final_score = ( | |
scaled_semantic * 0.35 + # Reduced semantic weight | |
feature_score * 0.45 + # Increased feature matching weight | |
breed_data['lifestyle_bonus'] * 0.15 + | |
breed_data['comparative_bonus'] * 0.05 | |
) | |
# Calculate base compatibility score | |
base_compatibility = final_score | |
# Apply dynamic scoring with natural distribution | |
if base_compatibility >= 0.9: # Exceptional matches | |
score_range = (0.92, 0.98) | |
position = (base_compatibility - 0.9) / 0.1 | |
elif base_compatibility >= 0.75: # Excellent matches | |
score_range = (0.85, 0.91) | |
position = (base_compatibility - 0.75) / 0.15 | |
elif base_compatibility >= 0.6: # Good matches | |
score_range = (0.75, 0.84) | |
position = (base_compatibility - 0.6) / 0.15 | |
elif base_compatibility >= 0.45: # Fair matches | |
score_range = (0.65, 0.74) | |
position = (base_compatibility - 0.45) / 0.15 | |
elif base_compatibility >= 0.3: # Poor matches | |
score_range = (0.55, 0.64) | |
position = (base_compatibility - 0.3) / 0.15 | |
else: # Very poor matches | |
score_range = (0.45, 0.54) | |
position = max(0, base_compatibility / 0.3) | |
# Calculate final score with natural variation | |
score_span = score_range[1] - score_range[0] | |
base_score = score_range[0] + (position * score_span) | |
# Add controlled random variation for natural ranking | |
random.seed(hash(breed + user_input[:15])) | |
variation = random.uniform(-0.015, 0.015) | |
display_score = round(max(0.45, min(0.98, base_score + variation)), 3) | |
breed_display_scores.append({ | |
'breed': breed, | |
'display_score': display_score, | |
'semantic_score': base_semantic, | |
'comparative_bonus': breed_data['comparative_bonus'], | |
'lifestyle_bonus': breed_data['lifestyle_bonus'] | |
}) | |
# Sort by display score to ensure ranking consistency | |
breed_display_scores.sort(key=lambda x: x['display_score'], reverse=True) | |
top_breeds = breed_display_scores[:top_k] | |
# Convert to standard recommendation format | |
recommendations = [] | |
for i, breed_data in enumerate(top_breeds): | |
breed = breed_data['breed'] | |
display_score = breed_data['display_score'] | |
# Get detailed information | |
breed_info = get_dog_description(breed) | |
recommendation = { | |
'breed': breed.replace('_', ' '), | |
'rank': i + 1, | |
'overall_score': display_score, # Use display score for consistency | |
'final_score': display_score, # Ensure final_score matches overall_score | |
'semantic_score': breed_data['semantic_score'], | |
'comparative_bonus': breed_data['comparative_bonus'], | |
'lifestyle_bonus': breed_data['lifestyle_bonus'], | |
'size': breed_info.get('Size', 'Unknown') if breed_info else 'Unknown', | |
'temperament': breed_info.get('Temperament', '') if breed_info else '', | |
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate') if breed_info else 'Moderate', | |
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate') if breed_info else 'Moderate', | |
'good_with_children': breed_info.get('Good with Children', 'Yes') if breed_info else 'Yes', | |
'lifespan': breed_info.get('Lifespan', '10-12 years') if breed_info else '10-12 years', | |
'description': breed_info.get('Description', '') if breed_info else '', | |
'search_type': 'description' | |
} | |
recommendations.append(recommendation) | |
print(f"Generated {len(recommendations)} semantic recommendations") | |
return recommendations | |
except Exception as e: | |
print(f"Failed to generate semantic recommendations: {str(e)}") | |
print(traceback.format_exc()) | |
return [] | |
def _calculate_lifestyle_bonus(self, breed_characteristics: Dict[str, Any], | |
lifestyle_keywords: Dict[str, List[str]]) -> float: | |
"""Enhanced lifestyle matching bonus calculation""" | |
bonus = 0.0 | |
penalties = 0.0 | |
# Enhanced size matching | |
breed_size = breed_characteristics.get('size', '').lower() | |
size_prefs = lifestyle_keywords.get('size_preference', []) | |
for pref in size_prefs: | |
if pref in breed_size: | |
bonus += 0.25 # Strong reward for size match | |
elif (pref == 'small' and 'large' in breed_size) or \ | |
(pref == 'large' and 'small' in breed_size): | |
penalties += 0.15 # Penalty for size mismatch | |
# Enhanced activity level matching | |
breed_exercise = breed_characteristics.get('exercise_needs', '').lower() | |
activity_prefs = lifestyle_keywords.get('activity_level', []) | |
if 'high' in activity_prefs: | |
if 'high' in breed_exercise or 'very high' in breed_exercise: | |
bonus += 0.2 | |
elif 'low' in breed_exercise: | |
penalties += 0.2 | |
elif 'low' in activity_prefs: | |
if 'low' in breed_exercise: | |
bonus += 0.2 | |
elif 'high' in breed_exercise or 'very high' in breed_exercise: | |
penalties += 0.25 | |
elif 'moderate' in activity_prefs: | |
if 'moderate' in breed_exercise: | |
bonus += 0.15 | |
# Enhanced family situation matching | |
good_with_children = breed_characteristics.get('good_with_children', 'Yes') | |
family_prefs = lifestyle_keywords.get('family_situation', []) | |
if 'children' in family_prefs: | |
if good_with_children == 'Yes': | |
bonus += 0.15 | |
else: | |
penalties += 0.3 # Strong penalty for non-child-friendly breeds | |
# Enhanced living space matching | |
living_prefs = lifestyle_keywords.get('living_space', []) | |
if 'apartment' in living_prefs: | |
if 'small' in breed_size: | |
bonus += 0.2 | |
elif 'medium' in breed_size and 'low' in breed_exercise: | |
bonus += 0.1 | |
elif 'large' in breed_size or 'giant' in breed_size: | |
penalties += 0.2 # Penalty for large dogs in apartments | |
# Noise preference matching | |
noise_prefs = lifestyle_keywords.get('noise_preference', []) | |
temperament = breed_characteristics.get('temperament', '').lower() | |
if 'low' in noise_prefs: | |
# Reward quiet breeds | |
if any(term in temperament for term in ['gentle', 'calm', 'quiet']): | |
bonus += 0.1 | |
# Care level matching | |
grooming_needs = breed_characteristics.get('grooming_needs', '').lower() | |
care_prefs = lifestyle_keywords.get('care_level', []) | |
if 'low' in care_prefs and 'low' in grooming_needs: | |
bonus += 0.1 | |
elif 'high' in care_prefs and 'high' in grooming_needs: | |
bonus += 0.1 | |
elif 'low' in care_prefs and 'high' in grooming_needs: | |
penalties += 0.15 | |
# Special needs matching | |
special_needs = lifestyle_keywords.get('special_needs', []) | |
if 'guard' in special_needs: | |
if any(term in temperament for term in ['protective', 'alert', 'watchful']): | |
bonus += 0.1 | |
elif 'companion' in special_needs: | |
if any(term in temperament for term in ['affectionate', 'gentle', 'loyal']): | |
bonus += 0.1 | |
# Calculate final bonus with penalties | |
final_bonus = bonus - penalties | |
return max(-0.3, min(0.5, final_bonus)) # Allow negative bonus but limit range | |
def _get_breed_info_from_standardized(self, standardized_info) -> Dict[str, Any]: | |
"""Convert standardized breed info to dictionary format""" | |
try: | |
size_map = {1: 'Tiny', 2: 'Small', 3: 'Medium', 4: 'Large', 5: 'Giant'} | |
exercise_map = {1: 'Low', 2: 'Moderate', 3: 'High', 4: 'Very High'} | |
care_map = {1: 'Low', 2: 'Moderate', 3: 'High'} | |
return { | |
'Size': size_map.get(standardized_info.size_category, 'Medium'), | |
'Exercise Needs': exercise_map.get(standardized_info.exercise_level, 'Moderate'), | |
'Grooming Needs': care_map.get(standardized_info.care_complexity, 'Moderate'), | |
'Good with Children': 'Yes' if standardized_info.child_compatibility >= 0.8 else | |
'No' if standardized_info.child_compatibility <= 0.2 else 'Unknown', | |
'Temperament': 'Varies by individual', | |
'Lifespan': '10-12 years', | |
'Description': f'A {size_map.get(standardized_info.size_category, "medium")} sized breed' | |
} | |
except Exception as e: | |
print(f"Error converting standardized info: {str(e)}") | |
return {} | |
def _get_fallback_recommendations(self, top_k: int = 15) -> List[Dict[str, Any]]: | |
"""Get fallback recommendations when enhanced system fails""" | |
try: | |
safe_breeds = [ | |
('Labrador Retriever', 0.85), | |
('Golden Retriever', 0.82), | |
('Cavalier King Charles Spaniel', 0.80), | |
('French Bulldog', 0.78), | |
('Boston Terrier', 0.76), | |
('Bichon Frise', 0.74), | |
('Pug', 0.72), | |
('Cocker Spaniel', 0.70) | |
] | |
recommendations = [] | |
for i, (breed, score) in enumerate(safe_breeds[:top_k]): | |
breed_info = get_dog_description(breed.replace(' ', '_')) or {} | |
recommendation = { | |
'breed': breed, | |
'rank': i + 1, | |
'overall_score': score, | |
'final_score': score, | |
'semantic_score': score * 0.8, | |
'comparative_bonus': 0.0, | |
'lifestyle_bonus': 0.0, | |
'size': breed_info.get('Size', 'Unknown'), | |
'temperament': breed_info.get('Temperament', ''), | |
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), | |
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), | |
'good_with_children': breed_info.get('Good with Children', 'Yes'), | |
'lifespan': breed_info.get('Lifespan', '10-12 years'), | |
'description': breed_info.get('Description', ''), | |
'search_type': 'fallback' | |
} | |
recommendations.append(recommendation) | |
return recommendations | |
except Exception as e: | |
print(f"Error generating fallback recommendations: {str(e)}") | |
return [] | |
def get_enhanced_recommendations_with_unified_scoring(self, user_input: str, top_k: int = 15) -> List[Dict[str, Any]]: | |
"""簡化的增強推薦方法""" | |
try: | |
print(f"Processing enhanced recommendation: {user_input[:50]}...") | |
# 使用基本語意匹配 | |
return self.get_semantic_recommendations(user_input, top_k) | |
except Exception as e: | |
error_msg = f"Enhanced recommendation error: {str(e)}. Please check your description." | |
print(f"ERROR: {error_msg}") | |
print(traceback.format_exc()) | |
raise RuntimeError(error_msg) from e | |
def _analyze_user_description_enhanced(self, user_description: str) -> Dict[str, Any]: | |
"""增強用戶描述分析""" | |
text = user_description.lower() | |
analysis = { | |
'mentioned_breeds': [], | |
'lifestyle_keywords': {}, | |
'preference_strength': {}, | |
'constraint_requirements': [], | |
'user_context': {} | |
} | |
# 提取提及的品種 | |
for breed in self.breed_list: | |
breed_display = breed.replace('_', ' ').lower() | |
if breed_display in text or any(word in text for word in breed_display.split()): | |
analysis['mentioned_breeds'].append(breed) | |
# 簡單偏好強度分析 | |
if any(word in text for word in ['love', 'prefer', 'like', '喜歡', '最愛']): | |
analysis['preference_strength'][breed] = 0.8 | |
else: | |
analysis['preference_strength'][breed] = 0.5 | |
# 提取約束要求 | |
if any(word in text for word in ['quiet', 'silent', 'no barking', '安靜']): | |
analysis['constraint_requirements'].append('low_noise') | |
if any(word in text for word in ['apartment', 'small space', '公寓']): | |
analysis['constraint_requirements'].append('apartment_suitable') | |
if any(word in text for word in ['children', 'kids', 'family', '小孩']): | |
analysis['constraint_requirements'].append('child_friendly') | |
# 提取用戶背景 | |
analysis['user_context'] = { | |
'has_children': any(word in text for word in ['children', 'kids', '小孩']), | |
'living_space': 'apartment' if any(word in text for word in ['apartment', '公寓']) else 'house', | |
'activity_level': 'high' if any(word in text for word in ['active', 'energetic', '活躍']) else 'moderate', | |
'noise_sensitive': any(word in text for word in ['quiet', 'silent', '安靜']), | |
'experience_level': 'beginner' if any(word in text for word in ['first time', 'beginner', '新手']) else 'intermediate' | |
} | |
return analysis | |
def _create_user_preferences_from_analysis_enhanced(self, analysis: Dict[str, Any]) -> UserPreferences: | |
"""從分析結果創建用戶偏好物件""" | |
context = analysis['user_context'] | |
# 推斷居住空間類型 | |
living_space = 'apartment' if context.get('living_space') == 'apartment' else 'house_small' | |
# 推斷院子權限 | |
yard_access = 'no_yard' if living_space == 'apartment' else 'shared_yard' | |
# 推斷運動時間 | |
activity_level = context.get('activity_level', 'moderate') | |
exercise_time_map = {'high': 120, 'moderate': 60, 'low': 30} | |
exercise_time = exercise_time_map.get(activity_level, 60) | |
# 推斷運動類型 | |
exercise_type_map = {'high': 'active_training', 'moderate': 'moderate_activity', 'low': 'light_walks'} | |
exercise_type = exercise_type_map.get(activity_level, 'moderate_activity') | |
# 推斷噪音容忍度 | |
noise_tolerance = 'low' if context.get('noise_sensitive', False) else 'medium' | |
return UserPreferences( | |
living_space=living_space, | |
yard_access=yard_access, | |
exercise_time=exercise_time, | |
exercise_type=exercise_type, | |
grooming_commitment='medium', | |
experience_level=context.get('experience_level', 'intermediate'), | |
time_availability='moderate', | |
has_children=context.get('has_children', False), | |
children_age='school_age' if context.get('has_children', False) else None, | |
noise_tolerance=noise_tolerance, | |
space_for_play=(living_space != 'apartment'), | |
other_pets=False, | |
climate='moderate', | |
health_sensitivity='medium', | |
barking_acceptance=noise_tolerance, | |
size_preference='no_preference' | |
) | |
def _get_candidate_breeds_enhanced(self, analysis: Dict[str, Any]) -> List[str]: | |
"""獲取候選品種列表""" | |
candidate_breeds = set() | |
# 如果提及特定品種,優先包含 | |
if analysis['mentioned_breeds']: | |
candidate_breeds.update(analysis['mentioned_breeds']) | |
# 根據約束要求過濾品種 | |
if 'apartment_suitable' in analysis['constraint_requirements']: | |
apartment_suitable = [ | |
'French_Bulldog', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', | |
'Pug', 'Bichon_Frise', 'Cocker_Spaniel', 'Yorkshire_Terrier', 'Shih_Tzu' | |
] | |
candidate_breeds.update(breed for breed in apartment_suitable if breed in self.breed_list) | |
if 'child_friendly' in analysis['constraint_requirements']: | |
child_friendly = [ | |
'Labrador_Retriever', 'Golden_Retriever', 'Beagle', 'Cavalier_King_Charles_Spaniel', | |
'Bichon_Frise', 'Poodle', 'Cocker_Spaniel' | |
] | |
candidate_breeds.update(breed for breed in child_friendly if breed in self.breed_list) | |
# 如果候選品種不足,添加更多通用品種 | |
if len(candidate_breeds) < 20: | |
general_breeds = [ | |
'Labrador_Retriever', 'German_Shepherd', 'Golden_Retriever', 'French_Bulldog', | |
'Bulldog', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', 'Boston_Terrier', | |
'Border_Collie', 'Siberian_Husky', 'Cavalier_King_Charles_Spaniel', 'Boxer', | |
'Bichon_Frise', 'Cocker_Spaniel', 'Shih_Tzu', 'Pug', 'Chihuahua' | |
] | |
candidate_breeds.update(breed for breed in general_breeds if breed in self.breed_list) | |
return list(candidate_breeds)[:30] # 限制候選數量以提高效率 | |
def _apply_constraint_filtering_enhanced(self, breed: str, analysis: Dict[str, Any]) -> float: | |
"""應用約束過濾,返回調整分數""" | |
penalty = 0.0 | |
breed_info = get_dog_description(breed) | |
if not breed_info: | |
return penalty | |
# 低噪音要求 | |
if 'low_noise' in analysis['constraint_requirements']: | |
noise_info = breed_noise_info.get(breed, {}) | |
noise_level = noise_info.get('noise_level', 'moderate').lower() | |
if 'high' in noise_level: | |
penalty -= 0.3 # 嚴重扣分 | |
elif 'low' in noise_level: | |
penalty += 0.1 # 輕微加分 | |
# 公寓適合性 | |
if 'apartment_suitable' in analysis['constraint_requirements']: | |
size = breed_info.get('Size', '').lower() | |
exercise_needs = breed_info.get('Exercise Needs', '').lower() | |
if size in ['large', 'giant']: | |
penalty -= 0.2 | |
elif size in ['small', 'tiny']: | |
penalty += 0.1 | |
if 'high' in exercise_needs: | |
penalty -= 0.15 | |
# 兒童友善性 | |
if 'child_friendly' in analysis['constraint_requirements']: | |
good_with_children = breed_info.get('Good with Children', 'Unknown') | |
if good_with_children == 'Yes': | |
penalty += 0.15 | |
elif good_with_children == 'No': | |
penalty -= 0.4 # 嚴重扣分 | |
return penalty | |
def _get_breed_characteristics_enhanced(self, breed: str) -> Dict[str, Any]: | |
"""獲取品種特徵""" | |
breed_info = get_dog_description(breed) | |
if not breed_info: | |
return {} | |
characteristics = { | |
'size': breed_info.get('Size', 'Unknown'), | |
'temperament': breed_info.get('Temperament', ''), | |
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), | |
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), | |
'good_with_children': breed_info.get('Good with Children', 'Unknown'), | |
'lifespan': breed_info.get('Lifespan', '10-12 years'), | |
'description': breed_info.get('Description', '') | |
} | |
# 添加噪音資訊 | |
noise_info = breed_noise_info.get(breed, {}) | |
characteristics['noise_level'] = noise_info.get('noise_level', 'moderate') | |
return characteristics | |
def get_hybrid_recommendations(self, user_description: str, | |
user_preferences: Optional[Any] = None, | |
top_k: int = 15) -> List[Dict[str, Any]]: | |
""" | |
Hybrid recommendations: Combine semantic matching with traditional scoring | |
Args: | |
user_description: User's natural language description | |
user_preferences: Optional structured preference settings | |
top_k: Number of recommendations to return | |
Returns: | |
Hybrid recommendation results | |
""" | |
try: | |
# Get semantic recommendations | |
semantic_recommendations = self.get_semantic_recommendations(user_description, top_k * 2) | |
if not user_preferences: | |
return semantic_recommendations[:top_k] | |
# Combine with traditional scoring | |
hybrid_results = [] | |
for semantic_rec in semantic_recommendations: | |
breed_name = semantic_rec['breed'].replace(' ', '_') | |
# Calculate traditional compatibility score | |
traditional_score = calculate_compatibility_score(user_preferences, breed_name) | |
# Hybrid score (semantic 40% + traditional 60%) | |
hybrid_score = ( | |
semantic_rec['overall_score'] * 0.4 + | |
traditional_score * 0.6 | |
) | |
semantic_rec['hybrid_score'] = hybrid_score | |
semantic_rec['traditional_score'] = traditional_score | |
hybrid_results.append(semantic_rec) | |
# Re-sort by hybrid score | |
hybrid_results.sort(key=lambda x: x['hybrid_score'], reverse=True) | |
# Update rankings | |
for i, result in enumerate(hybrid_results[:top_k]): | |
result['rank'] = i + 1 | |
result['overall_score'] = result['hybrid_score'] | |
return hybrid_results[:top_k] | |
except Exception as e: | |
print(f"Hybrid recommendation failed: {str(e)}") | |
print(traceback.format_exc()) | |
return self.get_semantic_recommendations(user_description, top_k) | |
def get_breed_recommendations_by_description(user_description: str, | |
user_preferences: Optional[Any] = None, | |
top_k: int = 15) -> List[Dict[str, Any]]: | |
"""Main interface function for getting breed recommendations by description""" | |
try: | |
print("Initializing Enhanced SemanticBreedRecommender...") | |
recommender = SemanticBreedRecommender() | |
# 嘗試載入SBERT模型(如果尚未載入) | |
if not recommender.sbert_model: | |
recommender._initialize_model() | |
# 優先使用整合統一評分系統的增強推薦 | |
print("Using enhanced recommendation system with unified scoring") | |
results = recommender.get_enhanced_recommendations_with_unified_scoring(user_description, top_k) | |
if results and len(results) > 0: | |
print(f"Generated {len(results)} enhanced recommendations successfully") | |
return results | |
else: | |
# 如果增強系統無結果,嘗試原有增強系統 | |
print("Enhanced unified system returned no results, trying original enhanced system") | |
results = recommender.get_enhanced_semantic_recommendations(user_description, top_k) | |
if results and len(results) > 0: | |
return results | |
else: | |
# 最後回退到標準系統 | |
print("All enhanced systems failed, using standard system") | |
if user_preferences: | |
results = recommender.get_hybrid_recommendations(user_description, user_preferences, top_k) | |
else: | |
results = recommender.get_semantic_recommendations(user_description, top_k) | |
if not results: | |
error_msg = f"All recommendation systems failed to generate results. Please check your input description and try again. Error details may be in the console." | |
print(f"ERROR: {error_msg}") | |
raise RuntimeError(error_msg) | |
return results | |
except Exception as e: | |
error_msg = f"Critical error in recommendation system: {str(e)}. Please check your input and system configuration." | |
print(f"ERROR: {error_msg}") | |
print(traceback.format_exc()) | |
raise RuntimeError(error_msg) from e | |
def get_enhanced_recommendations_with_unified_scoring(user_description: str, top_k: int = 15) -> List[Dict[str, Any]]: | |
"""簡化版本:基本語意推薦功能""" | |
try: | |
print(f"Processing description-based recommendation: {user_description[:50]}...") | |
# 創建基本推薦器實例 | |
recommender = SemanticBreedRecommender() | |
# 嘗試載入SBERT模型(如果尚未載入) | |
if not recommender.sbert_model: | |
recommender._initialize_model() | |
if not recommender.sbert_model: | |
print("SBERT model not available, using basic text matching...") | |
# 使用基本文字匹配邏輯 | |
return _get_basic_text_matching_recommendations(user_description, top_k) | |
# 確保breed vectors已建構 | |
if not recommender.breed_vectors: | |
recommender._build_breed_vectors() | |
# 使用語意相似度推薦 | |
recommendations = [] | |
user_embedding = recommender.sbert_model.encode(user_description) | |
# 計算所有品種的增強分數 | |
all_breed_scores = [] | |
for breed_name, breed_vector in recommender.breed_vectors.items(): | |
breed_embedding = breed_vector.embedding | |
similarity = cosine_similarity([user_embedding], [breed_embedding])[0][0] | |
# 獲取品種資料 | |
breed_info = get_dog_description(breed_name) or {} | |
# 計算增強的匹配分數 | |
enhanced_score = _calculate_enhanced_matching_score( | |
breed_name, breed_info, user_description, similarity | |
) | |
all_breed_scores.append((breed_name, enhanced_score, breed_info, similarity)) | |
# 按 final_score 排序(而不是語意相似度) | |
all_breed_scores.sort(key=lambda x: x[1]['final_score'], reverse=True) | |
top_breeds = all_breed_scores[:top_k] | |
for i, (breed, enhanced_score, breed_info, similarity) in enumerate(top_breeds): | |
recommendation = { | |
'breed': breed.replace('_', ' '), | |
'rank': i + 1, # 正確的排名 | |
'overall_score': enhanced_score['final_score'], | |
'final_score': enhanced_score['final_score'], | |
'semantic_score': similarity, | |
'comparative_bonus': enhanced_score['lifestyle_bonus'], | |
'lifestyle_bonus': enhanced_score['lifestyle_bonus'], | |
'size': breed_info.get('Size', 'Unknown'), | |
'temperament': breed_info.get('Temperament', 'Unknown'), | |
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), | |
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), | |
'good_with_children': breed_info.get('Good with Children', 'Unknown'), | |
'lifespan': breed_info.get('Lifespan', '10-12 years'), | |
'description': breed_info.get('Description', 'No description available'), | |
'search_type': 'description', | |
'scores': enhanced_score['dimension_scores'] | |
} | |
recommendations.append(recommendation) | |
print(f"Generated {len(recommendations)} semantic recommendations") | |
return recommendations | |
except Exception as e: | |
error_msg = f"Error in semantic recommendation system: {str(e)}. Please check your input and try again." | |
print(f"ERROR: {error_msg}") | |
print(traceback.format_exc()) | |
raise RuntimeError(error_msg) from e | |
def _calculate_enhanced_matching_score(breed: str, breed_info: dict, user_description: str, base_similarity: float) -> dict: | |
"""計算增強的匹配分數,基於用戶描述和品種特性""" | |
try: | |
user_desc = user_description.lower() | |
# 分析用戶需求 | |
space_requirements = _analyze_space_requirements(user_desc) | |
exercise_requirements = _analyze_exercise_requirements(user_desc) | |
noise_requirements = _analyze_noise_requirements(user_desc) | |
size_requirements = _analyze_size_requirements(user_desc) | |
family_requirements = _analyze_family_requirements(user_desc) | |
# 獲取品種特性 | |
breed_size = breed_info.get('Size', '').lower() | |
breed_exercise = breed_info.get('Exercise Needs', '').lower() | |
breed_noise = breed_noise_info.get(breed, {}).get('noise_level', 'moderate').lower() | |
breed_temperament = breed_info.get('Temperament', '').lower() | |
breed_good_with_children = breed_info.get('Good with Children', '').lower() | |
# 計算各維度匹配分數 | |
dimension_scores = {} | |
# 空間匹配 (30% 權重) | |
space_score = _calculate_space_compatibility(space_requirements, breed_size, breed_exercise) | |
dimension_scores['space'] = space_score | |
# 運動需求匹配 (25% 權重) | |
exercise_score = _calculate_exercise_compatibility(exercise_requirements, breed_exercise) | |
dimension_scores['exercise'] = exercise_score | |
# 噪音匹配 (20% 權重) | |
noise_score = _calculate_noise_compatibility(noise_requirements, breed_noise) | |
dimension_scores['noise'] = noise_score | |
# 體型匹配 (15% 權重) | |
size_score = _calculate_size_compatibility(size_requirements, breed_size) | |
dimension_scores['grooming'] = min(0.9, base_similarity + 0.1) # 美容需求基於語意相似度 | |
# 家庭相容性 (10% 權重) | |
family_score = _calculate_family_compatibility(family_requirements, breed_good_with_children, breed_temperament) | |
dimension_scores['family'] = family_score | |
dimension_scores['experience'] = min(0.9, base_similarity + 0.05) # 經驗需求基於語意相似度 | |
# 應用硬約束過濾 | |
constraint_penalty = _apply_hard_constraints_enhanced(user_desc, breed_info) | |
# 計算加權總分 - 精確化維度權重配置 | |
# 根據指導建議重新平衡維度權重 | |
weighted_score = ( | |
space_score * 0.30 + # 空間相容性(降低5%) | |
exercise_score * 0.28 + # 運動需求匹配(降低2%) | |
noise_score * 0.18 + # 噪音控制(提升3%) | |
family_score * 0.12 + # 家庭相容性(提升2%) | |
size_score * 0.08 + # 體型匹配(降低2%) | |
min(0.9, base_similarity + 0.1) * 0.04 # 護理需求(新增獨立權重) | |
) | |
# 優化完美匹配獎勵機制 - 降低觸發門檻並增加層次 | |
perfect_match_bonus = 0.0 | |
if space_score >= 0.88 and exercise_score >= 0.88 and noise_score >= 0.85: | |
perfect_match_bonus = 0.08 # 卓越匹配獎勵 | |
elif space_score >= 0.82 and exercise_score >= 0.82 and noise_score >= 0.75: | |
perfect_match_bonus = 0.04 # 優秀匹配獎勵 | |
elif space_score >= 0.75 and exercise_score >= 0.75: | |
perfect_match_bonus = 0.02 # 良好匹配獎勵 | |
# 結合語意相似度與維度匹配 - 調整為75%維度匹配 25%語義相似度 | |
base_combined_score = (weighted_score * 0.75 + base_similarity * 0.25) + perfect_match_bonus | |
# 應用漸進式約束懲罰,但確保基礎分數保障 | |
raw_final_score = base_combined_score + constraint_penalty | |
# 實施動態分數保障機制 - 提升至40-42%基礎分數 | |
# 根據品種特性動態調整基礎分數 | |
base_guaranteed_score = 0.42 # 提升基礎保障分數 | |
# 特殊品種基礎分數調整 | |
high_adaptability_breeds = ['French_Bulldog', 'Pug', 'Golden_Retriever', 'Labrador_Retriever'] | |
if any(breed in breed for breed in high_adaptability_breeds): | |
base_guaranteed_score = 0.45 # 高適應性品種更高基礎分數 | |
# 動態分數分佈優化 | |
if raw_final_score >= base_guaranteed_score: | |
# 對於高分品種,實施適度壓縮避免過度集中 | |
if raw_final_score > 0.85: | |
compression_factor = 0.92 # 輕度壓縮高分 | |
final_score = 0.85 + (raw_final_score - 0.85) * compression_factor | |
else: | |
final_score = raw_final_score | |
final_score = min(0.93, final_score) # 降低最高分數限制 | |
else: | |
# 對於低分品種,使用改進的保障機制 | |
normalized_raw_score = max(0.15, raw_final_score) | |
# 基礎保障75% + 實際計算25%,保持一定區分度 | |
final_score = base_guaranteed_score * 0.75 + normalized_raw_score * 0.25 | |
final_score = max(base_guaranteed_score, min(0.93, final_score)) | |
lifestyle_bonus = max(0.0, weighted_score - base_similarity) | |
return { | |
'final_score': final_score, | |
'weighted_score': weighted_score, | |
'lifestyle_bonus': lifestyle_bonus, | |
'dimension_scores': dimension_scores, | |
'constraint_penalty': constraint_penalty | |
} | |
except Exception as e: | |
print(f"Error in enhanced matching calculation for {breed}: {str(e)}") | |
return { | |
'final_score': base_similarity, | |
'weighted_score': base_similarity, | |
'lifestyle_bonus': 0.0, | |
'dimension_scores': { | |
'space': base_similarity * 0.9, | |
'exercise': base_similarity * 0.85, | |
'grooming': base_similarity * 0.8, | |
'experience': base_similarity * 0.75, | |
'noise': base_similarity * 0.7, | |
'family': base_similarity * 0.65 | |
}, | |
'constraint_penalty': 0.0 | |
} | |
def _analyze_space_requirements(user_desc: str) -> dict: | |
"""分析空間需求 - 增強中等活動量識別""" | |
requirements = {'type': 'unknown', 'size': 'medium', 'importance': 0.5} | |
if any(word in user_desc for word in ['apartment', 'small apartment', 'small space', 'condo', 'flat']): | |
requirements['type'] = 'apartment' | |
requirements['size'] = 'small' | |
requirements['importance'] = 0.95 # 提高重要性 | |
elif any(word in user_desc for word in ['medium-sized house', 'medium house', 'townhouse']): | |
requirements['type'] = 'medium_house' | |
requirements['size'] = 'medium' | |
requirements['importance'] = 0.8 # 中等活動量用戶的特殊標記 | |
elif any(word in user_desc for word in ['large house', 'big house', 'yard', 'garden', 'large space', 'backyard']): | |
requirements['type'] = 'house' | |
requirements['size'] = 'large' | |
requirements['importance'] = 0.7 | |
return requirements | |
def _analyze_exercise_requirements(user_desc: str) -> dict: | |
"""分析運動需求 - 增強中等活動量識別""" | |
requirements = {'level': 'moderate', 'importance': 0.5} | |
# 低運動量識別 | |
if any(word in user_desc for word in ["don't exercise", "don't exercise much", "low exercise", "minimal", "lazy", "not active"]): | |
requirements['level'] = 'low' | |
requirements['importance'] = 0.95 | |
# 中等運動量的精確識別 | |
elif any(phrase in user_desc for phrase in ['30 minutes', 'half hour', 'moderate', 'balanced', 'walk about']): | |
if 'walk' in user_desc or 'daily' in user_desc: | |
requirements['level'] = 'moderate' | |
requirements['importance'] = 0.85 # 中等活動量的特殊標記 | |
# 高運動量識別 | |
elif any(word in user_desc for word in ['active', 'hiking', 'outdoor activities', 'running', 'outdoors', 'love hiking']): | |
requirements['level'] = 'high' | |
requirements['importance'] = 0.9 | |
return requirements | |
def _analyze_noise_requirements(user_desc: str) -> dict: | |
"""分析噪音需求""" | |
requirements = {'tolerance': 'medium', 'importance': 0.5} | |
if any(word in user_desc for word in ['quiet', 'no bark', "won't bark", "doesn't bark", 'silent', 'peaceful']): | |
requirements['tolerance'] = 'low' | |
requirements['importance'] = 0.9 | |
elif any(word in user_desc for word in ['loud', 'barking ok', 'noise ok']): | |
requirements['tolerance'] = 'high' | |
requirements['importance'] = 0.7 | |
return requirements | |
def _analyze_size_requirements(user_desc: str) -> dict: | |
"""分析體型需求""" | |
requirements = {'preferred': 'any', 'importance': 0.5} | |
if any(word in user_desc for word in ['small', 'tiny', 'little', 'lap dog', 'compact']): | |
requirements['preferred'] = 'small' | |
requirements['importance'] = 0.8 | |
elif any(word in user_desc for word in ['large', 'big', 'giant']): | |
requirements['preferred'] = 'large' | |
requirements['importance'] = 0.8 | |
return requirements | |
def _analyze_family_requirements(user_desc: str) -> dict: | |
"""分析家庭需求""" | |
requirements = {'children': False, 'importance': 0.3} | |
if any(word in user_desc for word in ['children', 'kids', 'family', 'child']): | |
requirements['children'] = True | |
requirements['importance'] = 0.8 | |
return requirements | |
def _calculate_space_compatibility(space_req: dict, breed_size: str, breed_exercise: str) -> float: | |
"""計算空間相容性分數 - 增強中等活動量處理""" | |
if space_req['type'] == 'apartment': | |
if 'small' in breed_size or 'toy' in breed_size: | |
base_score = 0.95 | |
elif 'medium' in breed_size: | |
if 'low' in breed_exercise: | |
base_score = 0.75 | |
else: | |
base_score = 0.45 # 降低中型犬在公寓的分數 | |
elif 'large' in breed_size: | |
base_score = 0.05 # 大型犬極度不適合公寓 | |
elif 'giant' in breed_size: | |
base_score = 0.01 # 超大型犬完全不適合公寓 | |
else: | |
base_score = 0.7 | |
elif space_req['type'] == 'medium_house': | |
# 中型房屋的特殊處理 - 適合中等活動量用戶 | |
if 'small' in breed_size or 'toy' in breed_size: | |
base_score = 0.9 | |
elif 'medium' in breed_size: | |
base_score = 0.95 # 中型犬在中型房屋很適合 | |
elif 'large' in breed_size: | |
if 'moderate' in breed_exercise or 'low' in breed_exercise: | |
base_score = 0.8 # 低運動量大型犬還可以 | |
else: | |
base_score = 0.6 # 高運動量大型犬不太適合 | |
elif 'giant' in breed_size: | |
base_score = 0.3 # 超大型犬在中型房屋不太適合 | |
else: | |
base_score = 0.85 | |
else: | |
# 大型房屋的情況 | |
if 'small' in breed_size or 'toy' in breed_size: | |
base_score = 0.85 | |
elif 'medium' in breed_size: | |
base_score = 0.9 | |
elif 'large' in breed_size or 'giant' in breed_size: | |
base_score = 0.95 | |
else: | |
base_score = 0.8 | |
return min(0.95, base_score) | |
def _calculate_exercise_compatibility(exercise_req: dict, breed_exercise: str) -> float: | |
"""計算運動需求相容性分數 - 增強中等活動量處理""" | |
if exercise_req['level'] == 'low': | |
if 'low' in breed_exercise or 'minimal' in breed_exercise: | |
return 0.95 | |
elif 'moderate' in breed_exercise: | |
return 0.5 # 降低不匹配分數 | |
elif 'high' in breed_exercise: | |
return 0.1 # 進一步降低高運動需求的匹配 | |
else: | |
return 0.7 | |
elif exercise_req['level'] == 'high': | |
if 'high' in breed_exercise: | |
return 0.95 | |
elif 'moderate' in breed_exercise: | |
return 0.8 | |
elif 'low' in breed_exercise: | |
return 0.6 | |
else: | |
return 0.7 | |
else: # moderate - 中等活動量的精確處理 | |
if 'moderate' in breed_exercise: | |
return 0.95 # 完美匹配 | |
elif 'low' in breed_exercise: | |
return 0.85 # 低運動需求的品種對中等活動量用戶也不錯 | |
elif 'high' in breed_exercise: | |
return 0.5 # 中等活動量用戶不太適合高運動需求品種 | |
else: | |
return 0.75 | |
return 0.6 | |
def _calculate_noise_compatibility(noise_req: dict, breed_noise: str) -> float: | |
"""計算噪音相容性分數,更好處理複合等級""" | |
breed_noise_lower = breed_noise.lower() | |
if noise_req['tolerance'] == 'low': | |
if 'low' in breed_noise_lower and 'moderate' not in breed_noise_lower: | |
return 0.95 # 純低噪音 | |
elif 'low-moderate' in breed_noise_lower or 'low to moderate' in breed_noise_lower: | |
return 0.8 # 低到中等噪音,還可接受 | |
elif breed_noise_lower in ['moderate']: | |
return 0.4 # 中等噪音有些問題 | |
elif 'high' in breed_noise_lower: | |
return 0.1 # 高噪音不適合 | |
else: | |
return 0.6 # 未知噪音水平,保守估計 | |
elif noise_req['tolerance'] == 'high': | |
if 'high' in breed_noise_lower: | |
return 0.9 | |
elif 'moderate' in breed_noise_lower: | |
return 0.85 | |
elif 'low' in breed_noise_lower: | |
return 0.8 # 安靜犬對高容忍度的人也很好 | |
else: | |
return 0.8 | |
else: # moderate tolerance | |
if 'moderate' in breed_noise_lower: | |
return 0.9 | |
elif 'low' in breed_noise_lower: | |
return 0.85 | |
elif 'high' in breed_noise_lower: | |
return 0.6 | |
else: | |
return 0.75 | |
return 0.7 | |
def _calculate_size_compatibility(size_req: dict, breed_size: str) -> float: | |
"""計算體型相容性分數""" | |
if size_req['preferred'] == 'small': | |
if any(word in breed_size for word in ['small', 'toy', 'tiny']): | |
return 0.9 | |
elif 'medium' in breed_size: | |
return 0.6 | |
else: | |
return 0.3 | |
elif size_req['preferred'] == 'large': | |
if any(word in breed_size for word in ['large', 'giant']): | |
return 0.9 | |
elif 'medium' in breed_size: | |
return 0.7 | |
else: | |
return 0.4 | |
return 0.7 # 無特別偏好 | |
def _calculate_family_compatibility(family_req: dict, good_with_children: str, temperament: str) -> float: | |
"""計算家庭相容性分數""" | |
if family_req['children']: | |
if 'yes' in good_with_children.lower(): | |
return 0.9 | |
elif any(word in temperament for word in ['gentle', 'patient', 'friendly']): | |
return 0.8 | |
elif 'no' in good_with_children.lower(): | |
return 0.2 | |
else: | |
return 0.6 | |
return 0.7 | |
def _apply_hard_constraints_enhanced(user_desc: str, breed_info: dict) -> float: | |
"""應用品種特性感知的動態懲罰機制""" | |
penalty = 0.0 | |
# 建立懲罰衰減係數和補償機制 | |
penalty_decay_factor = 0.7 | |
breed_adaptability_bonus = 0.0 | |
breed_size = breed_info.get('Size', '').lower() | |
breed_exercise = breed_info.get('Exercise Needs', '').lower() | |
breed_name = breed_info.get('Breed', '').replace(' ', '_') | |
# 公寓空間約束 - 品種特性感知懲罰機制 | |
if 'apartment' in user_desc or 'small apartment' in user_desc: | |
if 'giant' in breed_size: | |
base_penalty = -0.35 # 減少基礎懲罰 | |
# 特定品種適應性補償 | |
adaptable_giants = ['Mastiff', 'Great Dane'] # 相對安靜的巨型犬 | |
if any(adapt_breed in breed_name for adapt_breed in adaptable_giants): | |
breed_adaptability_bonus += 0.08 | |
penalty += base_penalty * penalty_decay_factor | |
elif 'large' in breed_size: | |
base_penalty = -0.25 # 減少大型犬懲罰 | |
# 適合公寓的大型犬補償 | |
apartment_friendly_large = ['Greyhound', 'Great_Dane'] | |
if any(apt_breed in breed_name for apt_breed in apartment_friendly_large): | |
breed_adaptability_bonus += 0.06 | |
penalty += base_penalty * penalty_decay_factor | |
elif 'medium' in breed_size and 'high' in breed_exercise: | |
penalty += -0.15 * penalty_decay_factor # 進一步減少懲罰 | |
# 運動需求不匹配 - 品種特性感知懲罰機制 | |
if any(phrase in user_desc for phrase in ["don't exercise", "not active", "low exercise", "don't exercise much"]): | |
if 'high' in breed_exercise: | |
base_penalty = -0.28 # 減少基礎懲罰 | |
# 低維護高運動犬種補償 | |
adaptable_high_energy = ['Greyhound', 'Whippet'] # 運動爆發型,平時安靜 | |
if any(adapt_breed in breed_name for adapt_breed in adaptable_high_energy): | |
breed_adaptability_bonus += 0.10 | |
penalty += base_penalty * penalty_decay_factor | |
elif 'moderate' in breed_exercise: | |
penalty += -0.08 * penalty_decay_factor # 進一步減少懲罰 | |
# 噪音控制需求不匹配 - 品種特性感知懲罰機制 | |
if any(phrase in user_desc for phrase in ['quiet', "won't bark", "doesn't bark", "silent"]): | |
breed_noise = breed_noise_info.get(breed_name, {}).get('noise_level', 'moderate').lower() | |
if 'high' in breed_noise: | |
base_penalty = -0.18 # 減少基礎懲罰 | |
# 訓練性良好的高噪音品種補償 | |
trainable_vocal_breeds = ['German_Shepherd', 'Golden_Retriever'] | |
if any(train_breed in breed_name for train_breed in trainable_vocal_breeds): | |
breed_adaptability_bonus += 0.05 | |
penalty += base_penalty * penalty_decay_factor | |
elif 'moderate' in breed_noise and 'low' not in breed_noise: | |
penalty += -0.05 * penalty_decay_factor | |
# 體型偏好不匹配 - 漸進式懲罰 | |
if any(phrase in user_desc for phrase in ['small', 'tiny', 'little']): | |
if 'giant' in breed_size: | |
penalty -= 0.35 # 超大型犬懲罰 | |
elif 'large' in breed_size: | |
penalty -= 0.20 # 大型犬懲罰 | |
# 中等活動量用戶的特殊約束處理 - 漸進式懲罰 | |
moderate_activity_terms = ['30 minutes', 'half hour', 'moderate', 'balanced', 'medium-sized house'] | |
if any(term in user_desc for term in moderate_activity_terms): | |
# 超大型犬對中等活動量用戶的適度懲罰 | |
giant_breeds = ['Saint Bernard', 'Tibetan Mastiff', 'Great Dane', 'Mastiff', 'Newfoundland'] | |
if any(giant in breed_name for giant in giant_breeds) or 'giant' in breed_size: | |
penalty -= 0.35 # 適度懲罰,不完全排除 | |
# 中型房屋 + 超大型犬的額外考量 | |
if 'medium-sized house' in user_desc and any(giant in breed_name for giant in giant_breeds): | |
if not any(high_activity in user_desc for high_activity in ['hiking', 'running', 'active', 'outdoor activities']): | |
penalty -= 0.15 # 輕度額外懲罰 | |
# 30分鐘散步對極高運動需求品種的懲罰 | |
if any(term in user_desc for term in ['30 minutes', 'half hour']) and 'walk' in user_desc: | |
high_energy_breeds = ['Siberian Husky', 'Border Collie', 'Jack Russell Terrier', 'Weimaraner'] | |
if any(he_breed in breed_name for he_breed in high_energy_breeds) and 'high' in breed_exercise: | |
penalty -= 0.25 # 適度懲罰極高運動需求品種 | |
# 添加特殊品種適應性補償機制 | |
# 對於邊界適配品種,給予適度補償 | |
boundary_adaptable_breeds = { | |
'Italian_Greyhound': 0.08, # 安靜、低維護的小型犬 | |
'Boston_Bull': 0.06, # 適應性強的小型犬 | |
'Havanese': 0.05, # 友好適應的小型犬 | |
'Silky_terrier': 0.04, # 安靜的玩具犬 | |
'Basset': 0.07 # 低能量但友好的中型犬 | |
} | |
if breed_name in boundary_adaptable_breeds: | |
breed_adaptability_bonus += boundary_adaptable_breeds[breed_name] | |
# 應用品種適應性補償並設置懲罰上限 | |
final_penalty = penalty + breed_adaptability_bonus | |
# 限制最大懲罰,避免單一約束主導評分 | |
final_penalty = max(-0.4, final_penalty) | |
return final_penalty | |
def _get_basic_text_matching_recommendations(user_description: str, top_k: int = 15) -> List[Dict[str, Any]]: | |
"""基本文字匹配推薦(SBERT 不可用時的後備方案)""" | |
try: | |
print("Using basic text matching as fallback...") | |
# 基本關鍵字匹配 | |
keywords = user_description.lower().split() | |
breed_scores = [] | |
# 從數據庫獲取品種清單 | |
try: | |
conn = sqlite3.connect('animal_detector.db') | |
cursor = conn.cursor() | |
cursor.execute("SELECT DISTINCT Breed FROM AnimalCatalog LIMIT 50") | |
basic_breeds = [row[0] for row in cursor.fetchall()] | |
cursor.close() | |
conn.close() | |
except Exception as e: | |
print(f"Could not load breed list from database: {str(e)}") | |
# 後備品種清單 | |
basic_breeds = [ | |
'Labrador_Retriever', 'Golden_Retriever', 'German_Shepherd', 'French_Bulldog', | |
'Border_Collie', 'Poodle', 'Beagle', 'Rottweiler', 'Yorkshire_Terrier', | |
'Dachshund', 'Boxer', 'Siberian_Husky', 'Great_Dane', 'Pomeranian', 'Shih-Tzu', | |
'Maltese_Dog', 'Chihuahua', 'Cavalier_King_Charles_Spaniel', 'Boston_Terrier', | |
'Japanese_Spaniel', 'Toy_Terrier', 'Affenpinscher', 'Pekingese', 'Lhasa' | |
] | |
for breed in basic_breeds: | |
breed_info = get_dog_description(breed) or {} | |
breed_text = f"{breed} {breed_info.get('Temperament', '')} {breed_info.get('Size', '')} {breed_info.get('Description', '')}".lower() | |
# 計算關鍵字匹配分數 | |
matches = sum(1 for keyword in keywords if keyword in breed_text) | |
base_score = min(0.95, 0.3 + (matches / len(keywords)) * 0.6) | |
# 應用增強匹配邏輯 | |
enhanced_score = _calculate_enhanced_matching_score( | |
breed, breed_info, user_description, base_score | |
) | |
breed_scores.append((breed, enhanced_score['final_score'], breed_info, enhanced_score)) | |
# 按分數排序 | |
breed_scores.sort(key=lambda x: x[1], reverse=True) | |
recommendations = [] | |
for i, (breed, final_score, breed_info, enhanced_score) in enumerate(breed_scores[:top_k]): | |
recommendation = { | |
'breed': breed.replace('_', ' '), | |
'rank': i + 1, | |
'overall_score': final_score, | |
'final_score': final_score, | |
'semantic_score': enhanced_score.get('weighted_score', final_score), | |
'comparative_bonus': enhanced_score.get('lifestyle_bonus', 0.0), | |
'lifestyle_bonus': enhanced_score.get('lifestyle_bonus', 0.0), | |
'size': breed_info.get('Size', 'Unknown'), | |
'temperament': breed_info.get('Temperament', 'Unknown'), | |
'exercise_needs': breed_info.get('Exercise Needs', 'Moderate'), | |
'grooming_needs': breed_info.get('Grooming Needs', 'Moderate'), | |
'good_with_children': breed_info.get('Good with Children', 'Unknown'), | |
'lifespan': breed_info.get('Lifespan', '10-12 years'), | |
'description': breed_info.get('Description', 'No description available'), | |
'search_type': 'description', | |
'scores': enhanced_score.get('dimension_scores', { | |
'space': final_score * 0.9, | |
'exercise': final_score * 0.85, | |
'grooming': final_score * 0.8, | |
'experience': final_score * 0.75, | |
'noise': final_score * 0.7, | |
'family': final_score * 0.65 | |
}) | |
} | |
recommendations.append(recommendation) | |
return recommendations | |
except Exception as e: | |
error_msg = f"Error in basic text matching: {str(e)}" | |
print(f"ERROR: {error_msg}") | |
raise RuntimeError(error_msg) from e |