Spaces:

brestok
/

TraumaBackend

Running

App Files Files Community

brestok commited on Feb 19

Commit

97743c1

1 Parent(s): 20faa08

add top match

Browse files

Files changed (4) hide show

trauma/api/data/model.py +4 -2
trauma/api/message/ai/engine.py +36 -17
trauma/api/message/ai/openai_request.py +16 -0
trauma/api/message/ai/prompts.py +41 -4

trauma/api/data/model.py CHANGED Viewed

@@ -46,5 +46,7 @@ class EntityModel(MongoBaseModel):
 class EntityModelExtended(EntityModel):
     highlightedAgeGroup: AgeGroup
-    highlightedTreatmentArea: str
-    highlightedTreatmentMethod: str

 class EntityModelExtended(EntityModel):
     highlightedAgeGroup: AgeGroup
+    highlightedTreatmentArea: str | None
+    highlightedTreatmentMethod: str | None
+    topMatch: bool = False
+    score: float | None = None

trauma/api/message/ai/engine.py CHANGED Viewed

@@ -10,7 +10,7 @@ from trauma.api.message.ai.openai_request import (update_entity_data_with_ai,
                                                   generate_search_request,
                                                   generate_final_response, convert_value_to_embeddings,
                                                   choose_closest_treatment_method, choose_closest_treatment_area,
-                                                  check_is_valid_request, generate_invalid_response)
 from trauma.api.message.db_requests import (save_assistant_user_message,
                                             filter_entities_by_age,
                                             update_entity_data_obj, get_entity_by_index)
@@ -67,21 +67,40 @@ async def search_semantic_entities(
     ]
     filtered_results = sorted(filtered_results, key=lambda x: x["distance"])[:5]
     final_entities = await asyncio.gather(*[get_entity_by_index(i['index']) for i in filtered_results])
-    final_entities_extended = await asyncio.gather(
-        *[extended_entity_with_highlights(entity, entity_data) for entity in final_entities]
-    )
-    return final_entities_extended
-async def extended_entity_with_highlights(entity: EntityModel, entity_data: dict) -> EntityModelExtended:
-    age_group = find_matching_age_group(entity, entity_data)
-    treatment_area, treatment_method = await asyncio.gather(
-        choose_closest_treatment_area(entity.treatmentAreas, entity_data['treatmentArea']),
-        choose_closest_treatment_method(entity.treatmentMethods, entity_data['treatmentMethod'])
-    )
-    return EntityModelExtended(
-        **entity.to_mongo(),
-        highlightedAgeGroup=age_group,
-        highlightedTreatmentArea=treatment_area,
-        highlightedTreatmentMethod=treatment_method
-    )

                                                   generate_search_request,
                                                   generate_final_response, convert_value_to_embeddings,
                                                   choose_closest_treatment_method, choose_closest_treatment_area,
+                                                  check_is_valid_request, generate_invalid_response, set_entity_score)
 from trauma.api.message.db_requests import (save_assistant_user_message,
                                             filter_entities_by_age,
                                             update_entity_data_obj, get_entity_by_index)
     ]
     filtered_results = sorted(filtered_results, key=lambda x: x["distance"])[:5]
     final_entities = await asyncio.gather(*[get_entity_by_index(i['index']) for i in filtered_results])
+    final_entities_extended = await extended_entities_with_highlights(final_entities, entity_data)
+    final_entities_scored = await set_entities_score(final_entities_extended, search_request)
+    return final_entities_scored
+async def extended_entities_with_highlights(entities: list[EntityModel], entity_data: dict) -> list[
+    EntityModelExtended]:
+    async def choose_closest(entity_: EntityModel) -> tuple:
+        treatment_area, treatment_method = await asyncio.gather(
+            choose_closest_treatment_area(entity_.treatmentAreas, entity_data['treatmentArea']),
+            choose_closest_treatment_method(entity_.treatmentMethods, entity_data['treatmentMethod'])
+        )
+        return treatment_area, treatment_method
+    results = await asyncio.gather(*[choose_closest(entity) for entity in entities])
+    final_entities = []
+    for treatment, entity in zip(results, entities):
+        age_group = find_matching_age_group(entity, entity_data)
+        final_entities.append(EntityModelExtended(
+            **entity.to_mongo(),
+            highlightedAgeGroup=age_group,
+            highlightedTreatmentArea=treatment[0],
+            highlightedTreatmentMethod=treatment[1]
+        ))
+    return final_entities
+async def set_entities_score(entities: list[EntityModelExtended], search_request: str) -> list[EntityModelExtended]:
+    scores = await asyncio.gather(*[set_entity_score(entity, search_request) for entity in entities])
+    final_entities = []
+    for score, entity in zip(scores, entities):
+        if score > 0.9:
+            entity.topMatch = True
+        entity.score = score
+        if score > 0.75:
+            final_entities.append(entity)
+    return sorted(final_entities, key=lambda x: x.score, reverse=True)

trauma/api/message/ai/openai_request.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 from trauma.api.chat.dto import EntityData
 from trauma.api.message.ai.prompts import TraumaPrompts
 from trauma.core.config import settings
 from trauma.core.wrappers import openai_wrapper
@@ -134,3 +135,18 @@ async def generate_invalid_response(user_message: str, message_history: list[dic
         }
     ]
     return messages

 import json
 from trauma.api.chat.dto import EntityData
+from trauma.api.data.model import EntityModelExtended
 from trauma.api.message.ai.prompts import TraumaPrompts
 from trauma.core.config import settings
 from trauma.core.wrappers import openai_wrapper
         }
     ]
     return messages
+@openai_wrapper(is_json=True, return_='score')
+async def set_entity_score(entity: EntityModelExtended, search_request: str):
+    messages = [
+        {
+            "role": "system",
+            "content": TraumaPrompts.set_entity_score
+            .replace("{entity}", entity.model_dump_json(exclude={
+                "ageGroups", "treatmentAreas", "treatmentMethods", "contactDetails"
+            }))
+            .replace("{search_request}", search_request)
+        }
+    ]
+    return messages

trauma/api/message/ai/prompts.py CHANGED Viewed

@@ -193,7 +193,7 @@ Je bent verplicht om een beschrijving voor een kliniek te genereren op basis van
     choose_closest_treatment_area = """## Task
-You must determine the most semantically similar disorder or disease from the list of [treatment areas] to the requested disease [requested treatment area]. The most similar disease should be returned in the [result] field of the JSON.
 ## Data
@@ -217,10 +217,11 @@ You must determine the most semantically similar disorder or disease from the li
 ## Instructions for filling JSON
-- [result]: The item from the [treatment areas] list that is most semantically similar to the requested disease. The disease name in the result field must exactly match the name as it appears in the [treatment areas] list."""
     choose_closest_treatment_method = """## Task
-You must determine the most semantically similar treatment method from the list of [treatment methods] to the requested treatment method [requested treatment method]. The most similar treatment method should be returned in the [result] field of the JSON.
 ## Data
@@ -244,5 +245,41 @@ You must determine the most semantically similar treatment method from the list
 ## Instructions for filling JSON
-- [result]: The item from the [treatment methods] list that is most semantically similar to the requested treatment method. The treatment method name in the result field must exactly match the name as it appears in the [treatment methods] list."""

     choose_closest_treatment_area = """## Task
+You must determine the most semantically similar disorder or disease from the list of [treatment areas] to the requested disease [requested treatment area]. The most similar disease should be returned in the [result] field of the JSON. If there is no similar disease, you must save `null`.
 ## Data
 ## Instructions for filling JSON
+- [result]: The item from the [treatment areas] list that is most semantically similar to the requested disease. The disease name in the result field must exactly match the name as it appears in the [treatment areas] list. If there is no similar element from [treatment areas], you must save `null`.
+"""
     choose_closest_treatment_method = """## Task
+You must determine the most semantically similar treatment method from the list of [treatment methods] to the requested treatment method [requested treatment method]. The most similar treatment method should be returned in the [result] field of the JSON. If there is no similar treatment method, you must save `null`.
 ## Data
 ## Instructions for filling JSON
+- [result]: The item from the [treatment methods] list that is most semantically similar to the requested treatment method. The treatment method name in the result field must exactly match the name as it appears in the [treatment methods] list. If there is no similar element from [treatment methods], you must save `null`."""
+    set_entity_score = """## Task
+You must to assign a **relevance score** to a facility based on a given search request. The relevance score should range between **0.00 and 1.00**, where **1.00** indicates a perfect match and **0.00** indicates no relevance.
+## Evaluation Criteria
+Analyze the entire `Facility` object with a focus on the following key fields:
+- **highlightedAgeGroup**: The target age group that the facility serves.
+- **highlightedTreatmentArea**: The primary area of treatment provided by the facility.
+- **highlightedTreatmentMethod**: The main treatment method used at the facility.
+- **description**: Any additional text that may indicate relevance to the search request.
+## Scoring Guidelines
+- Assign **higher scores** when the `highlightedAgeGroup`, `highlightedTreatmentArea`, `highlightedTreatmentMethod`, and `description` closely **match** the user’s query.
+- Apply **penalties** for mismatches, partial overlaps, or missing key attributes.
+## Input
+**Search request**
+```
+{search_request}
+```
+**Facility**:
+```json
+{entity}
+```
+## **Output Format**
+Your response must be in the following JSON format:
+```json
+{
+  "score": float
+}
+```
+- **score**: A floating-point number between **0.00 and 1.00**, representing the degree of relevance."""