Spaces:

AIdeaText
/

v3

Sleeping

App Files Files Community

AIdeaText commited on Dec 27, 2024

Commit

f9997f7

verified ·

1 Parent(s): 7eef08f

Update modules/studentact/current_situation_analysis.py

Browse files

Files changed (1) hide show

modules/studentact/current_situation_analysis.py +105 -28

modules/studentact/current_situation_analysis.py CHANGED Viewed

@@ -143,41 +143,121 @@ def analyze_clarity(doc):
         logger.error(f"Error en analyze_clarity: {str(e)}")
         return 0.0, {}
-def analyze_reference_clarity(doc):
     """
-    Analiza la claridad de las referencias en el texto
     """
     try:
-        # Contar referencias anafóricas
-        reference_count = 0
-        unclear_references = 0
         for token in doc:
-            # Detectar pronombres y determinantes
-            if token.pos_ in ['PRON', 'DET']:
-                reference_count += 1
-                # Verificar si tiene antecedente claro
-                has_antecedent = False
-                for ancestor in token.ancestors:
-                    if ancestor.pos_ == 'NOUN':
-                        has_antecedent = True
-                        break
-                if not has_antecedent:
-                    unclear_references += 1
-        # Calcular score
-        if reference_count == 0:
-            return 1.0  # No hay referencias = claridad máxima
-        clarity = 1.0 - (unclear_references / reference_count)
-        return max(0.0, min(1.0, clarity))
     except Exception as e:
-        logger.error(f"Error en analyze_reference_clarity: {str(e)}")
-        return 0.0
 def analyze_vocabulary_diversity(doc):
     """Análisis mejorado de la diversidad y calidad del vocabulario"""
     try:
@@ -547,9 +627,6 @@ def normalize_score(value, metric_type,
         logger.error(f"Error en normalize_score: {str(e)}")
         return 0.0
 ##############################################################
 # Funciones de generación de gráficos

         logger.error(f"Error en analyze_clarity: {str(e)}")
         return 0.0, {}
+###################################################################################3
+def analyze_clarity(doc):
     """
+    Analiza la claridad del texto considerando múltiples factores.
     """
     try:
+        sentences = list(doc.sents)
+        if not sentences:
+            return 0.0, {}
+        # 1. Longitud de oraciones
+        sentence_lengths = [len(sent) for sent in sentences]
+        avg_length = sum(sentence_lengths) / len(sentences)
+        # Normalizar usando los umbrales definidos para clarity
+        length_score = normalize_score(
+            value=avg_length,
+            metric_type='clarity',
+            optimal_length=20,  # Una oración ideal tiene ~20 palabras
+            min_threshold=0.60,  # Consistente con METRIC_THRESHOLDS
+            target_threshold=0.75  # Consistente con METRIC_THRESHOLDS
+        )
+        # 2. Análisis de conectores
+        connector_count = 0
+        connector_weights = {
+            'CCONJ': 1.0,  # Coordinantes
+            'SCONJ': 1.2,  # Subordinantes
+            'ADV': 0.8     # Adverbios conectivos
+        }
         for token in doc:
+            if token.pos_ in connector_weights and token.dep_ in ['cc', 'mark', 'advmod']:
+                connector_count += connector_weights[token.pos_]
+        # Normalizar conectores por oración
+        connectors_per_sentence = connector_count / len(sentences) if sentences else 0
+        connector_score = normalize_score(
+            value=connectors_per_sentence,
+            metric_type='clarity',
+            optimal_connections=1.5,  # ~1.5 conectores por oración es óptimo
+            min_threshold=0.60,
+            target_threshold=0.75
+        )
+        # 3. Complejidad estructural
+        clause_count = 0
+        for sent in sentences:
+            verbs = [token for token in sent if token.pos_ == 'VERB']
+            clause_count += len(verbs)
+        complexity_raw = clause_count / len(sentences) if sentences else 0
+        complexity_score = normalize_score(
+            value=complexity_raw,
+            metric_type='clarity',
+            optimal_depth=2.0,  # ~2 cláusulas por oración es óptimo
+            min_threshold=0.60,
+            target_threshold=0.75
+        )
+        # 4. Densidad léxica
+        content_words = len([token for token in doc if token.pos_ in ['NOUN', 'VERB', 'ADJ', 'ADV']])
+        total_words = len([token for token in doc if token.is_alpha])
+        density = content_words / total_words if total_words > 0 else 0
+        density_score = normalize_score(
+            value=density,
+            metric_type='clarity',
+            optimal_connections=0.6,  # 60% de palabras de contenido es óptimo
+            min_threshold=0.60,
+            target_threshold=0.75
+        )
+        # Score final ponderado
+        weights = {
+            'length': 0.3,
+            'connectors': 0.3,
+            'complexity': 0.2,
+            'density': 0.2
+        }
+        clarity_score = (
+            weights['length'] * length_score +
+            weights['connectors'] * connector_score +
+            weights['complexity'] * complexity_score +
+            weights['density'] * density_score
+        )
+        details = {
+            'length_score': length_score,
+            'connector_score': connector_score,
+            'complexity_score': complexity_score,
+            'density_score': density_score,
+            'avg_sentence_length': avg_length,
+            'connectors_per_sentence': connectors_per_sentence,
+            'density': density
+        }
+        # Agregar logging para diagnóstico
+        logger.info(f"""
+            Scores de Claridad:
+            - Longitud: {length_score:.2f} (avg={avg_length:.1f} palabras)
+            - Conectores: {connector_score:.2f} (avg={connectors_per_sentence:.1f} por oración)
+            - Complejidad: {complexity_score:.2f} (avg={complexity_raw:.1f} cláusulas)
+            - Densidad: {density_score:.2f} ({density*100:.1f}% palabras de contenido)
+            - Score Final: {clarity_score:.2f}
+        """)
+        return clarity_score, details
     except Exception as e:
+        logger.error(f"Error en analyze_clarity: {str(e)}")
+        return 0.0, {}
+##########################################################################3
 def analyze_vocabulary_diversity(doc):
     """Análisis mejorado de la diversidad y calidad del vocabulario"""
     try:
         logger.error(f"Error en normalize_score: {str(e)}")
         return 0.0
 ##############################################################
 # Funciones de generación de gráficos