talhasideline commited on
Commit
a6a1c25
·
verified ·
1 Parent(s): 8ea01ce

Update Original_OpenAPI_DB.py

Browse files
Files changed (1) hide show
  1. Original_OpenAPI_DB.py +4 -4
Original_OpenAPI_DB.py CHANGED
@@ -143,7 +143,7 @@ hockey_keywords = [
143
  "midfielder", "middenvelder", "forward", "aanvaller", "striker", "spits", "player", "speler",
144
  "corner", "short corner", "penalty corner", "strafcorner", "free hit", "vrije slag",
145
  "tackle", "marking", "defending", "attacking", "skills", "technique", "techniek", "improve",
146
- "tips", "advice", "help", "suggest", "recommendation", "better", "enhance"
147
  ]
148
 
149
  # Greetings for detection
@@ -316,7 +316,7 @@ def is_in_domain(prompt: str) -> bool:
316
  similarity = util.cos_sim(prompt_embedding, hockey_embedding).item()
317
  max_similarity = max(max_similarity, similarity)
318
 
319
- has_hockey_semantic = max_similarity > 0.35 # Slightly higher threshold
320
  logging.debug(f"Hockey domain check for '{prompt}': keywords={has_hockey_keywords}, "
321
  f"semantic={has_hockey_semantic} (score: {max_similarity:.3f})")
322
 
@@ -369,11 +369,11 @@ def is_greeting_or_vague(prompt: str, user_lang: str = "en") -> bool:
369
  return True
370
 
371
  # It's a greeting ONLY if:
372
- # 1. High ratio of greeting words AND
373
  # 2. No hockey keywords AND
374
  # 3. No semantic hockey content
375
  is_pure_greeting = (
376
- greeting_ratio > 0.7 and
377
  not has_hockey_keywords and
378
  not has_hockey_semantic
379
  )
 
143
  "midfielder", "middenvelder", "forward", "aanvaller", "striker", "spits", "player", "speler",
144
  "corner", "short corner", "penalty corner", "strafcorner", "free hit", "vrije slag",
145
  "tackle", "marking", "defending", "attacking", "skills", "technique", "techniek", "improve",
146
+ "tips", "advice", "help", "suggest", "better", "enhance"
147
  ]
148
 
149
  # Greetings for detection
 
316
  similarity = util.cos_sim(prompt_embedding, hockey_embedding).item()
317
  max_similarity = max(max_similarity, similarity)
318
 
319
+ has_hockey_semantic = max_similarity > 0.4 # Higher threshold to reduce false positives
320
  logging.debug(f"Hockey domain check for '{prompt}': keywords={has_hockey_keywords}, "
321
  f"semantic={has_hockey_semantic} (score: {max_similarity:.3f})")
322
 
 
369
  return True
370
 
371
  # It's a greeting ONLY if:
372
+ # 1. High ratio of greeting words (>50% for multi-word, or single greeting word) AND
373
  # 2. No hockey keywords AND
374
  # 3. No semantic hockey content
375
  is_pure_greeting = (
376
+ (greeting_ratio >= 0.5 or (total_words <= 2 and greeting_words)) and
377
  not has_hockey_keywords and
378
  not has_hockey_semantic
379
  )