JerLag commited on
Commit
9557430
·
verified ·
1 Parent(s): 4857f00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -46
app.py CHANGED
@@ -1,20 +1,31 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- NPS Assurance — Gradio (Paste-only)
4
- - Entrée: verbatims collés (1 ligne = 1 verbatim, score NPS optionnel après un séparateur, ex: "|")
5
- - Sorties: émotion (pos/neutre/neg), thématiques, occurrences, résumé Markdown, graphiques Plotly
6
- - IA (facultatif): OpenAI pour sentiment/thèmes/synthèse. Sans clé, fallback HuggingFace (si installé) puis règles lexicales.
7
- - Déployable tel quel sur Hugging Face Spaces (app_file = app.py)
 
8
  """
9
 
10
  import os, re, json, collections, tempfile, zipfile
11
  from typing import List, Dict, Optional
12
  import pandas as pd
13
- from unidecode import unidecode
14
  import gradio as gr
15
  import plotly.express as px
16
  import plotly.graph_objects as go
17
 
 
 
 
 
 
 
 
 
 
 
 
18
  # ---------------- Thésaurus ASSURANCE ----------------
19
  THEMES = {
20
  "Remboursements santé":[r"\bremboursement[s]?\b", r"\bt[eé]l[eé]transmission\b", r"\bno[eé]mie\b",
@@ -60,8 +71,7 @@ INTENSIFIERS = [r"\btr[eè]s\b", r"\bvraiment\b", r"\bextr[eê]mement\b", r"\bhy
60
  DIMINISHERS = [r"\bun[e]?\s+peu\b", r"\bassez\b", r"\bplut[oô]t\b", r"\bl[eé]g[eè]rement\b"]
61
  INTENSIFIER_W, DIMINISHER_W = 1.5, 0.7
62
 
63
- # --------------- OpenAI (optionnel) ---------------
64
- # --- OpenAI (optionnel, robuste) ---
65
  OPENAI_AVAILABLE = False
66
  try:
67
  from openai import OpenAI
@@ -71,7 +81,6 @@ try:
71
  except Exception:
72
  OPENAI_AVAILABLE = False
73
 
74
-
75
  # ---------------- Utils ----------------
76
  def normalize(t:str)->str:
77
  if not isinstance(t,str): return ""
@@ -177,6 +186,32 @@ def oa_summary(nps:Optional[float], dist:Dict[str,int], themes_df:pd.DataFrame,
177
  if isinstance(j, dict): return ' '.join(str(v) for v in j.values())
178
  return None
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  # --------- Graphiques ----------
181
  def fig_nps_gauge(nps: Optional[float]) -> go.Figure:
182
  v = 0.0 if nps is None else float(nps)
@@ -202,7 +237,7 @@ def fig_theme_balance(themes_df: pd.DataFrame, k: int) -> go.Figure:
202
  fig = px.bar(d2, x="theme", y="count", color="type", barmode="stack", title=f"Top {k} thèmes — balance Pos/Neg")
203
  fig.update_layout(xaxis_tickangle=-30); return fig
204
 
205
- # --------- Analyse principale ----------
206
  def analyze_text(pasted_txt, has_sc, sep_chr,
207
  do_anonymize, use_oa_sent, use_oa_themes, use_oa_summary,
208
  oa_model, oa_temp, top_k):
@@ -214,22 +249,14 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
214
  if do_anonymize:
215
  df["comment"]=df["comment"].apply(anonymize)
216
 
217
- # après (on retombe sur HF/règles automatiquement)
218
  if (use_oa_sent or use_oa_themes or use_oa_summary) and not OPENAI_AVAILABLE:
219
  use_oa_sent = use_oa_themes = use_oa_summary = False
220
 
221
- # HF sentiment (optionnel)
222
- HF_AVAILABLE=False
223
- try:
224
- from transformers import pipeline
225
- hf_pipe = pipeline("text-classification",
226
- model="cmarkea/distilcamembert-base-sentiment",
227
- tokenizer="cmarkea/distilcamembert-base-sentiment")
228
- HF_AVAILABLE=True
229
- except Exception:
230
- HF_AVAILABLE=False
231
  def hf_sent(text:str):
232
- if not HF_AVAILABLE or not text.strip(): return None
233
  try:
234
  res=hf_pipe(text); lab=str(res[0]["label"]).lower(); p=float(res[0].get("score",0.5))
235
  if "1" in lab or "2" in lab: return {"label":"negatif","score":-4*p}
@@ -241,14 +268,15 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
241
  rows=[]
242
  theme_agg=collections.defaultdict(lambda:{"mentions":0,"pos":0,"neg":0})
243
  used_hf=False; used_oa=False
 
244
 
245
- for _, r in df.iterrows():
246
- cid=r["id"]; comment=normalize(str(r["comment"]))
247
 
248
  # Sentiment: OpenAI -> HF -> règles
249
  sent=None
250
  if use_oa_sent:
251
- sent=oa_sentiment(comment, oa_model, oa_temp); used_oa = used_oa or bool(sent)
252
  if not sent:
253
  hf=hf_sent(comment)
254
  if hf: sent=hf; used_hf=True
@@ -259,7 +287,7 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
259
  # Thèmes: regex (+ fusion OpenAI)
260
  themes, counts = detect_themes_regex(comment)
261
  if use_oa_themes:
262
- tjson=oa_themes(comment, oa_model, oa_temp)
263
  if isinstance(tjson, dict):
264
  used_oa=True
265
  for th, c in (tjson.get("counts",{}) or {}).items():
@@ -267,18 +295,37 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
267
  counts[th] = max(counts.get(th, 0), int(c))
268
  themes = [th for th, c in counts.items() if c > 0]
269
 
270
- bucket = nps_bucket(r.get("nps_score", None))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
  for th, c in counts.items():
273
  theme_agg[th]["mentions"] += c
274
- if sent["label"] == "positive":
275
- theme_agg[th]["pos"] += 1
276
- elif sent["label"] == "negatif":
277
- theme_agg[th]["neg"] += 1
278
 
279
  rows.append({
280
- "id": cid, "nps_score": r.get("nps_score", None), "nps_bucket": bucket,
281
  "comment": comment,
 
 
 
 
 
282
  "sentiment_score": round(float(sent["score"]), 3),
283
  "sentiment_label": sent["label"],
284
  "sentiment_source": "openai" if (use_oa_sent and used_oa) else ("huggingface" if used_hf else "rules"),
@@ -287,7 +334,7 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
287
  })
288
 
289
  out_df=pd.DataFrame(rows)
290
- nps=compute_nps(df["nps_score"]) # peut être None si pas de scores
291
  dist=out_df["sentiment_label"].value_counts().to_dict()
292
 
293
  # Stats par thème
@@ -298,11 +345,12 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
298
  "net_sentiment":int(d["pos"]-d["neg"])})
299
  themes_df=pd.DataFrame(trs).sort_values(["total_mentions","net_sentiment"],ascending=[False,False])
300
 
301
- # Synthèse texte
302
  method = "OpenAI + HF + règles" if (use_oa_sent and used_hf) else ("OpenAI + règles" if use_oa_sent else ("HF + règles" if used_hf else "Règles"))
 
303
  lines=[ "# Synthèse NPS & ressentis clients",
304
  f"- **Méthode** : {method}",
305
- f"- **NPS global** : {nps:.1f}" if nps is not None else "- **NPS global** : n/a" ]
306
  if dist:
307
  tot=sum(dist.values()); pos=dist.get("positive",0); neg=dist.get("negatif",0); neu=dist.get("neutre",0)
308
  lines.append(f"- **Répartition émotions** : positive {pos}/{tot}, neutre {neu}/{tot}, négative {neg}/{tot}")
@@ -313,10 +361,10 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
313
  summary_md="\n".join(lines)
314
 
315
  if use_oa_summary:
316
- md = oa_summary(nps, dist, themes_df, oa_model, oa_temp)
317
  if md: summary_md = md + "\n\n---\n" + summary_md
318
 
319
- # Fichiers export
320
  tmpdir=tempfile.mkdtemp(prefix="nps_gradio_")
321
  enriched=os.path.join(tmpdir,"enriched_comments.csv")
322
  themes=os.path.join(tmpdir,"themes_stats.csv")
@@ -330,14 +378,7 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
330
  z.write(themes,arcname="themes_stats.csv")
331
  z.write(summ,arcname="summary.md")
332
 
333
- # Graphiques
334
- fig_gauge = fig_nps_gauge(nps)
335
- fig_emots = fig_sentiment_bar(dist)
336
- k = max(1, int(top_k or 10))
337
- fig_top = fig_top_themes(themes_df, k)
338
- fig_bal = fig_theme_balance(themes_df, k)
339
-
340
- # Panneaux (rapide)
341
  def make_panels(dfT: pd.DataFrame):
342
  if dfT is None or dfT.empty: return "—","—","—"
343
  pos_top = dfT.sort_values(["verbatims_pos","total_mentions"], ascending=[False,False]).head(4)
@@ -362,13 +403,18 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
362
  return ench_md, irr_md, "\n".join(rec_lines)
363
 
364
  ench_md, irr_md, reco_md = make_panels(themes_df)
 
 
 
 
 
365
 
366
  return (summary_md, themes_df.head(100), out_df.head(200), [enriched, themes, summ, zip_path],
367
  ench_md, irr_md, reco_md, fig_gauge, fig_emots, fig_top, fig_bal)
368
 
369
  # ---------------- UI ----------------
370
  with gr.Blocks(title="NPS — Analyse (Assurance)") as demo:
371
- gr.Markdown("## 🔎 NPS — Analyse sémantique (Assurance)\nColle tes verbatims (1 par ligne). Option: score NPS après un `|`.")
372
 
373
  with gr.Column():
374
  pasted = gr.Textbox(label="Verbatims (un par ligne)", lines=10,
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ NPS Assurance — Gradio (Paste-only, NPS inféré)
4
+ - Entrée : verbatims collés (1 ligne = 1 verbatim, score NPS optionnel après un séparateur, ex: "|")
5
+ - Sorties : émotion (pos/neutre/neg), thématiques, occurrences, résumé Markdown, graphiques Plotly
6
+ - IA (facultatif) : OpenAI pour sentiment/thèmes/synthèse (fallback vers HuggingFace si dispo, puis règles lexicales)
7
+ - NPS inféré : si une note manque, elle est déduite du sentiment (cohérente avec NPS)
8
+ - Tolérant : si OpenAI ou unidecode ne sont pas installés, l’app continue avec des fallback
9
  """
10
 
11
  import os, re, json, collections, tempfile, zipfile
12
  from typing import List, Dict, Optional
13
  import pandas as pd
 
14
  import gradio as gr
15
  import plotly.express as px
16
  import plotly.graph_objects as go
17
 
18
+ # --- unidecode (fallback si paquet absent) ---
19
+ try:
20
+ from unidecode import unidecode
21
+ except Exception:
22
+ import unicodedata
23
+ def unidecode(x):
24
+ try:
25
+ return unicodedata.normalize('NFKD', str(x)).encode('ascii','ignore').decode('ascii')
26
+ except Exception:
27
+ return str(x)
28
+
29
  # ---------------- Thésaurus ASSURANCE ----------------
30
  THEMES = {
31
  "Remboursements santé":[r"\bremboursement[s]?\b", r"\bt[eé]l[eé]transmission\b", r"\bno[eé]mie\b",
 
71
  DIMINISHERS = [r"\bun[e]?\s+peu\b", r"\bassez\b", r"\bplut[oô]t\b", r"\bl[eé]g[eè]rement\b"]
72
  INTENSIFIER_W, DIMINISHER_W = 1.5, 0.7
73
 
74
+ # --------------- OpenAI (optionnel, robuste) ---------------
 
75
  OPENAI_AVAILABLE = False
76
  try:
77
  from openai import OpenAI
 
81
  except Exception:
82
  OPENAI_AVAILABLE = False
83
 
 
84
  # ---------------- Utils ----------------
85
  def normalize(t:str)->str:
86
  if not isinstance(t,str): return ""
 
186
  if isinstance(j, dict): return ' '.join(str(v) for v in j.values())
187
  return None
188
 
189
+ # --------- HuggingFace sentiment (optionnel)
190
+ def make_hf_pipe():
191
+ try:
192
+ from transformers import pipeline
193
+ return pipeline("text-classification",
194
+ model="cmarkea/distilcamembert-base-sentiment",
195
+ tokenizer="cmarkea/distilcamembert-base-sentiment")
196
+ except Exception:
197
+ return None
198
+
199
+ # --------- Inférence de note NPS à partir du sentiment ----------
200
+ def infer_nps_from_sentiment(label: str, score: float) -> int:
201
+ """
202
+ label: 'positive' | 'neutre' | 'negatif'
203
+ score: [-4..+4]
204
+ Retourne 0..10 aligné avec NPS (detractor ≤6, passive 7–8, promoter ≥9).
205
+ """
206
+ # Mise à l’échelle douce: [-4..+4] -> [0..10]
207
+ scaled = int(round((float(score) + 4.0) * 1.25)) # -4 -> 0, 0 -> 5, +4 -> 10
208
+ scaled = max(0, min(10, scaled))
209
+ if label == "positive":
210
+ return max(9, scaled) # garantir promoter
211
+ if label == "negatif":
212
+ return min(6, scaled) # garantir detractor
213
+ return 8 if score >= 0 else 7 # neutre
214
+
215
  # --------- Graphiques ----------
216
  def fig_nps_gauge(nps: Optional[float]) -> go.Figure:
217
  v = 0.0 if nps is None else float(nps)
 
237
  fig = px.bar(d2, x="theme", y="count", color="type", barmode="stack", title=f"Top {k} thèmes — balance Pos/Neg")
238
  fig.update_layout(xaxis_tickangle=-30); return fig
239
 
240
+ # --------- Analyse principale (paste-only) ----------
241
  def analyze_text(pasted_txt, has_sc, sep_chr,
242
  do_anonymize, use_oa_sent, use_oa_themes, use_oa_summary,
243
  oa_model, oa_temp, top_k):
 
249
  if do_anonymize:
250
  df["comment"]=df["comment"].apply(anonymize)
251
 
252
+ # Si OpenAI indisponible, on bascule silencieusement
253
  if (use_oa_sent or use_oa_themes or use_oa_summary) and not OPENAI_AVAILABLE:
254
  use_oa_sent = use_oa_themes = use_oa_summary = False
255
 
256
+ hf_pipe = make_hf_pipe()
257
+
 
 
 
 
 
 
 
 
258
  def hf_sent(text:str):
259
+ if hf_pipe is None or not text.strip(): return None
260
  try:
261
  res=hf_pipe(text); lab=str(res[0]["label"]).lower(); p=float(res[0].get("score",0.5))
262
  if "1" in lab or "2" in lab: return {"label":"negatif","score":-4*p}
 
268
  rows=[]
269
  theme_agg=collections.defaultdict(lambda:{"mentions":0,"pos":0,"neg":0})
270
  used_hf=False; used_oa=False
271
+ any_inferred=False
272
 
273
+ for idx, r in df.iterrows():
274
+ cid=r.get("id", idx+1); comment=normalize(str(r["comment"]))
275
 
276
  # Sentiment: OpenAI -> HF -> règles
277
  sent=None
278
  if use_oa_sent:
279
+ sent=oa_sentiment(comment, oa_model, float(oa_temp or 0.0)); used_oa = used_oa or bool(sent)
280
  if not sent:
281
  hf=hf_sent(comment)
282
  if hf: sent=hf; used_hf=True
 
287
  # Thèmes: regex (+ fusion OpenAI)
288
  themes, counts = detect_themes_regex(comment)
289
  if use_oa_themes:
290
+ tjson=oa_themes(comment, oa_model, float(oa_temp or 0.0))
291
  if isinstance(tjson, dict):
292
  used_oa=True
293
  for th, c in (tjson.get("counts",{}) or {}).items():
 
295
  counts[th] = max(counts.get(th, 0), int(c))
296
  themes = [th for th, c in counts.items() if c > 0]
297
 
298
+ # Note NPS : donnée ou inférée
299
+ given = r.get("nps_score", None)
300
+ try:
301
+ given = int(given) if given is not None and str(given).strip() != "" else None
302
+ except Exception:
303
+ given = None
304
+
305
+ if given is None:
306
+ inferred = infer_nps_from_sentiment(sent["label"], float(sent["score"]))
307
+ nps_final = inferred
308
+ nps_source = "inferred"
309
+ any_inferred = True
310
+ else:
311
+ nps_final = given
312
+ nps_source = "given"
313
+
314
+ bucket = nps_bucket(nps_final)
315
 
316
  for th, c in counts.items():
317
  theme_agg[th]["mentions"] += c
318
+ if sent["label"] == "positive": theme_agg[th]["pos"] += 1
319
+ elif sent["label"] == "negatif": theme_agg[th]["neg"] += 1
 
 
320
 
321
  rows.append({
322
+ "id": cid,
323
  "comment": comment,
324
+ "nps_score_given": given,
325
+ "nps_score_inferred": nps_final if given is None else None,
326
+ "nps_score_final": nps_final,
327
+ "nps_source": nps_source,
328
+ "nps_bucket": bucket,
329
  "sentiment_score": round(float(sent["score"]), 3),
330
  "sentiment_label": sent["label"],
331
  "sentiment_source": "openai" if (use_oa_sent and used_oa) else ("huggingface" if used_hf else "rules"),
 
334
  })
335
 
336
  out_df=pd.DataFrame(rows)
337
+ nps=compute_nps(out_df["nps_score_final"])
338
  dist=out_df["sentiment_label"].value_counts().to_dict()
339
 
340
  # Stats par thème
 
345
  "net_sentiment":int(d["pos"]-d["neg"])})
346
  themes_df=pd.DataFrame(trs).sort_values(["total_mentions","net_sentiment"],ascending=[False,False])
347
 
348
+ # Synthèse
349
  method = "OpenAI + HF + règles" if (use_oa_sent and used_hf) else ("OpenAI + règles" if use_oa_sent else ("HF + règles" if used_hf else "Règles"))
350
+ nps_label = "NPS global (inféré)" if any_inferred else "NPS global"
351
  lines=[ "# Synthèse NPS & ressentis clients",
352
  f"- **Méthode** : {method}",
353
+ f"- **{nps_label}** : {nps:.1f}" if nps is not None else f"- **{nps_label}** : n/a" ]
354
  if dist:
355
  tot=sum(dist.values()); pos=dist.get("positive",0); neg=dist.get("negatif",0); neu=dist.get("neutre",0)
356
  lines.append(f"- **Répartition émotions** : positive {pos}/{tot}, neutre {neu}/{tot}, négative {neg}/{tot}")
 
361
  summary_md="\n".join(lines)
362
 
363
  if use_oa_summary:
364
+ md = oa_summary(nps, dist, themes_df, oa_model, float(oa_temp or 0.0))
365
  if md: summary_md = md + "\n\n---\n" + summary_md
366
 
367
+ # Exports
368
  tmpdir=tempfile.mkdtemp(prefix="nps_gradio_")
369
  enriched=os.path.join(tmpdir,"enriched_comments.csv")
370
  themes=os.path.join(tmpdir,"themes_stats.csv")
 
378
  z.write(themes,arcname="themes_stats.csv")
379
  z.write(summ,arcname="summary.md")
380
 
381
+ # Panneaux & Graphes
 
 
 
 
 
 
 
382
  def make_panels(dfT: pd.DataFrame):
383
  if dfT is None or dfT.empty: return "—","—","—"
384
  pos_top = dfT.sort_values(["verbatims_pos","total_mentions"], ascending=[False,False]).head(4)
 
403
  return ench_md, irr_md, "\n".join(rec_lines)
404
 
405
  ench_md, irr_md, reco_md = make_panels(themes_df)
406
+ fig_gauge = fig_nps_gauge(nps)
407
+ fig_emots = fig_sentiment_bar(dist)
408
+ k = max(1, int(top_k or 10))
409
+ fig_top = fig_top_themes(themes_df, k)
410
+ fig_bal = fig_theme_balance(themes_df, k)
411
 
412
  return (summary_md, themes_df.head(100), out_df.head(200), [enriched, themes, summ, zip_path],
413
  ench_md, irr_md, reco_md, fig_gauge, fig_emots, fig_top, fig_bal)
414
 
415
  # ---------------- UI ----------------
416
  with gr.Blocks(title="NPS — Analyse (Assurance)") as demo:
417
+ gr.Markdown("## 🔎 NPS — Analyse sémantique (Assurance)\nCollez vos verbatims (1 par ligne). Option : note NPS après `|`.")
418
 
419
  with gr.Column():
420
  pasted = gr.Textbox(label="Verbatims (un par ligne)", lines=10,