Update app.py
Browse files
app.py
CHANGED
@@ -1,20 +1,31 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""
|
3 |
-
NPS Assurance — Gradio (Paste-only)
|
4 |
-
- Entrée: verbatims collés (1 ligne = 1 verbatim, score NPS optionnel après un séparateur, ex: "|")
|
5 |
-
- Sorties: émotion (pos/neutre/neg), thématiques, occurrences, résumé Markdown, graphiques Plotly
|
6 |
-
- IA (facultatif): OpenAI pour sentiment/thèmes/synthèse
|
7 |
-
-
|
|
|
8 |
"""
|
9 |
|
10 |
import os, re, json, collections, tempfile, zipfile
|
11 |
from typing import List, Dict, Optional
|
12 |
import pandas as pd
|
13 |
-
from unidecode import unidecode
|
14 |
import gradio as gr
|
15 |
import plotly.express as px
|
16 |
import plotly.graph_objects as go
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# ---------------- Thésaurus ASSURANCE ----------------
|
19 |
THEMES = {
|
20 |
"Remboursements santé":[r"\bremboursement[s]?\b", r"\bt[eé]l[eé]transmission\b", r"\bno[eé]mie\b",
|
@@ -60,8 +71,7 @@ INTENSIFIERS = [r"\btr[eè]s\b", r"\bvraiment\b", r"\bextr[eê]mement\b", r"\bhy
|
|
60 |
DIMINISHERS = [r"\bun[e]?\s+peu\b", r"\bassez\b", r"\bplut[oô]t\b", r"\bl[eé]g[eè]rement\b"]
|
61 |
INTENSIFIER_W, DIMINISHER_W = 1.5, 0.7
|
62 |
|
63 |
-
# --------------- OpenAI (optionnel) ---------------
|
64 |
-
# --- OpenAI (optionnel, robuste) ---
|
65 |
OPENAI_AVAILABLE = False
|
66 |
try:
|
67 |
from openai import OpenAI
|
@@ -71,7 +81,6 @@ try:
|
|
71 |
except Exception:
|
72 |
OPENAI_AVAILABLE = False
|
73 |
|
74 |
-
|
75 |
# ---------------- Utils ----------------
|
76 |
def normalize(t:str)->str:
|
77 |
if not isinstance(t,str): return ""
|
@@ -177,6 +186,32 @@ def oa_summary(nps:Optional[float], dist:Dict[str,int], themes_df:pd.DataFrame,
|
|
177 |
if isinstance(j, dict): return ' '.join(str(v) for v in j.values())
|
178 |
return None
|
179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
# --------- Graphiques ----------
|
181 |
def fig_nps_gauge(nps: Optional[float]) -> go.Figure:
|
182 |
v = 0.0 if nps is None else float(nps)
|
@@ -202,7 +237,7 @@ def fig_theme_balance(themes_df: pd.DataFrame, k: int) -> go.Figure:
|
|
202 |
fig = px.bar(d2, x="theme", y="count", color="type", barmode="stack", title=f"Top {k} thèmes — balance Pos/Neg")
|
203 |
fig.update_layout(xaxis_tickangle=-30); return fig
|
204 |
|
205 |
-
# --------- Analyse principale ----------
|
206 |
def analyze_text(pasted_txt, has_sc, sep_chr,
|
207 |
do_anonymize, use_oa_sent, use_oa_themes, use_oa_summary,
|
208 |
oa_model, oa_temp, top_k):
|
@@ -214,22 +249,14 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
|
|
214 |
if do_anonymize:
|
215 |
df["comment"]=df["comment"].apply(anonymize)
|
216 |
|
217 |
-
|
218 |
if (use_oa_sent or use_oa_themes or use_oa_summary) and not OPENAI_AVAILABLE:
|
219 |
use_oa_sent = use_oa_themes = use_oa_summary = False
|
220 |
|
221 |
-
|
222 |
-
|
223 |
-
try:
|
224 |
-
from transformers import pipeline
|
225 |
-
hf_pipe = pipeline("text-classification",
|
226 |
-
model="cmarkea/distilcamembert-base-sentiment",
|
227 |
-
tokenizer="cmarkea/distilcamembert-base-sentiment")
|
228 |
-
HF_AVAILABLE=True
|
229 |
-
except Exception:
|
230 |
-
HF_AVAILABLE=False
|
231 |
def hf_sent(text:str):
|
232 |
-
if
|
233 |
try:
|
234 |
res=hf_pipe(text); lab=str(res[0]["label"]).lower(); p=float(res[0].get("score",0.5))
|
235 |
if "1" in lab or "2" in lab: return {"label":"negatif","score":-4*p}
|
@@ -241,14 +268,15 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
|
|
241 |
rows=[]
|
242 |
theme_agg=collections.defaultdict(lambda:{"mentions":0,"pos":0,"neg":0})
|
243 |
used_hf=False; used_oa=False
|
|
|
244 |
|
245 |
-
for
|
246 |
-
cid=r
|
247 |
|
248 |
# Sentiment: OpenAI -> HF -> règles
|
249 |
sent=None
|
250 |
if use_oa_sent:
|
251 |
-
sent=oa_sentiment(comment, oa_model, oa_temp); used_oa = used_oa or bool(sent)
|
252 |
if not sent:
|
253 |
hf=hf_sent(comment)
|
254 |
if hf: sent=hf; used_hf=True
|
@@ -259,7 +287,7 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
|
|
259 |
# Thèmes: regex (+ fusion OpenAI)
|
260 |
themes, counts = detect_themes_regex(comment)
|
261 |
if use_oa_themes:
|
262 |
-
tjson=oa_themes(comment, oa_model, oa_temp)
|
263 |
if isinstance(tjson, dict):
|
264 |
used_oa=True
|
265 |
for th, c in (tjson.get("counts",{}) or {}).items():
|
@@ -267,18 +295,37 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
|
|
267 |
counts[th] = max(counts.get(th, 0), int(c))
|
268 |
themes = [th for th, c in counts.items() if c > 0]
|
269 |
|
270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
|
272 |
for th, c in counts.items():
|
273 |
theme_agg[th]["mentions"] += c
|
274 |
-
if sent["label"] == "positive":
|
275 |
-
|
276 |
-
elif sent["label"] == "negatif":
|
277 |
-
theme_agg[th]["neg"] += 1
|
278 |
|
279 |
rows.append({
|
280 |
-
"id": cid,
|
281 |
"comment": comment,
|
|
|
|
|
|
|
|
|
|
|
282 |
"sentiment_score": round(float(sent["score"]), 3),
|
283 |
"sentiment_label": sent["label"],
|
284 |
"sentiment_source": "openai" if (use_oa_sent and used_oa) else ("huggingface" if used_hf else "rules"),
|
@@ -287,7 +334,7 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
|
|
287 |
})
|
288 |
|
289 |
out_df=pd.DataFrame(rows)
|
290 |
-
nps=compute_nps(
|
291 |
dist=out_df["sentiment_label"].value_counts().to_dict()
|
292 |
|
293 |
# Stats par thème
|
@@ -298,11 +345,12 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
|
|
298 |
"net_sentiment":int(d["pos"]-d["neg"])})
|
299 |
themes_df=pd.DataFrame(trs).sort_values(["total_mentions","net_sentiment"],ascending=[False,False])
|
300 |
|
301 |
-
# Synthèse
|
302 |
method = "OpenAI + HF + règles" if (use_oa_sent and used_hf) else ("OpenAI + règles" if use_oa_sent else ("HF + règles" if used_hf else "Règles"))
|
|
|
303 |
lines=[ "# Synthèse NPS & ressentis clients",
|
304 |
f"- **Méthode** : {method}",
|
305 |
-
f"- **
|
306 |
if dist:
|
307 |
tot=sum(dist.values()); pos=dist.get("positive",0); neg=dist.get("negatif",0); neu=dist.get("neutre",0)
|
308 |
lines.append(f"- **Répartition émotions** : positive {pos}/{tot}, neutre {neu}/{tot}, négative {neg}/{tot}")
|
@@ -313,10 +361,10 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
|
|
313 |
summary_md="\n".join(lines)
|
314 |
|
315 |
if use_oa_summary:
|
316 |
-
md = oa_summary(nps, dist, themes_df, oa_model, oa_temp)
|
317 |
if md: summary_md = md + "\n\n---\n" + summary_md
|
318 |
|
319 |
-
#
|
320 |
tmpdir=tempfile.mkdtemp(prefix="nps_gradio_")
|
321 |
enriched=os.path.join(tmpdir,"enriched_comments.csv")
|
322 |
themes=os.path.join(tmpdir,"themes_stats.csv")
|
@@ -330,14 +378,7 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
|
|
330 |
z.write(themes,arcname="themes_stats.csv")
|
331 |
z.write(summ,arcname="summary.md")
|
332 |
|
333 |
-
#
|
334 |
-
fig_gauge = fig_nps_gauge(nps)
|
335 |
-
fig_emots = fig_sentiment_bar(dist)
|
336 |
-
k = max(1, int(top_k or 10))
|
337 |
-
fig_top = fig_top_themes(themes_df, k)
|
338 |
-
fig_bal = fig_theme_balance(themes_df, k)
|
339 |
-
|
340 |
-
# Panneaux (rapide)
|
341 |
def make_panels(dfT: pd.DataFrame):
|
342 |
if dfT is None or dfT.empty: return "—","—","—"
|
343 |
pos_top = dfT.sort_values(["verbatims_pos","total_mentions"], ascending=[False,False]).head(4)
|
@@ -362,13 +403,18 @@ def analyze_text(pasted_txt, has_sc, sep_chr,
|
|
362 |
return ench_md, irr_md, "\n".join(rec_lines)
|
363 |
|
364 |
ench_md, irr_md, reco_md = make_panels(themes_df)
|
|
|
|
|
|
|
|
|
|
|
365 |
|
366 |
return (summary_md, themes_df.head(100), out_df.head(200), [enriched, themes, summ, zip_path],
|
367 |
ench_md, irr_md, reco_md, fig_gauge, fig_emots, fig_top, fig_bal)
|
368 |
|
369 |
# ---------------- UI ----------------
|
370 |
with gr.Blocks(title="NPS — Analyse (Assurance)") as demo:
|
371 |
-
gr.Markdown("## 🔎 NPS — Analyse sémantique (Assurance)\
|
372 |
|
373 |
with gr.Column():
|
374 |
pasted = gr.Textbox(label="Verbatims (un par ligne)", lines=10,
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""
|
3 |
+
NPS Assurance — Gradio (Paste-only, NPS inféré)
|
4 |
+
- Entrée : verbatims collés (1 ligne = 1 verbatim, score NPS optionnel après un séparateur, ex: "|")
|
5 |
+
- Sorties : émotion (pos/neutre/neg), thématiques, occurrences, résumé Markdown, graphiques Plotly
|
6 |
+
- IA (facultatif) : OpenAI pour sentiment/thèmes/synthèse (fallback vers HuggingFace si dispo, puis règles lexicales)
|
7 |
+
- NPS inféré : si une note manque, elle est déduite du sentiment (cohérente avec NPS)
|
8 |
+
- Tolérant : si OpenAI ou unidecode ne sont pas installés, l’app continue avec des fallback
|
9 |
"""
|
10 |
|
11 |
import os, re, json, collections, tempfile, zipfile
|
12 |
from typing import List, Dict, Optional
|
13 |
import pandas as pd
|
|
|
14 |
import gradio as gr
|
15 |
import plotly.express as px
|
16 |
import plotly.graph_objects as go
|
17 |
|
18 |
+
# --- unidecode (fallback si paquet absent) ---
|
19 |
+
try:
|
20 |
+
from unidecode import unidecode
|
21 |
+
except Exception:
|
22 |
+
import unicodedata
|
23 |
+
def unidecode(x):
|
24 |
+
try:
|
25 |
+
return unicodedata.normalize('NFKD', str(x)).encode('ascii','ignore').decode('ascii')
|
26 |
+
except Exception:
|
27 |
+
return str(x)
|
28 |
+
|
29 |
# ---------------- Thésaurus ASSURANCE ----------------
|
30 |
THEMES = {
|
31 |
"Remboursements santé":[r"\bremboursement[s]?\b", r"\bt[eé]l[eé]transmission\b", r"\bno[eé]mie\b",
|
|
|
71 |
DIMINISHERS = [r"\bun[e]?\s+peu\b", r"\bassez\b", r"\bplut[oô]t\b", r"\bl[eé]g[eè]rement\b"]
|
72 |
INTENSIFIER_W, DIMINISHER_W = 1.5, 0.7
|
73 |
|
74 |
+
# --------------- OpenAI (optionnel, robuste) ---------------
|
|
|
75 |
OPENAI_AVAILABLE = False
|
76 |
try:
|
77 |
from openai import OpenAI
|
|
|
81 |
except Exception:
|
82 |
OPENAI_AVAILABLE = False
|
83 |
|
|
|
84 |
# ---------------- Utils ----------------
|
85 |
def normalize(t:str)->str:
|
86 |
if not isinstance(t,str): return ""
|
|
|
186 |
if isinstance(j, dict): return ' '.join(str(v) for v in j.values())
|
187 |
return None
|
188 |
|
189 |
+
# --------- HuggingFace sentiment (optionnel)
|
190 |
+
def make_hf_pipe():
|
191 |
+
try:
|
192 |
+
from transformers import pipeline
|
193 |
+
return pipeline("text-classification",
|
194 |
+
model="cmarkea/distilcamembert-base-sentiment",
|
195 |
+
tokenizer="cmarkea/distilcamembert-base-sentiment")
|
196 |
+
except Exception:
|
197 |
+
return None
|
198 |
+
|
199 |
+
# --------- Inférence de note NPS à partir du sentiment ----------
|
200 |
+
def infer_nps_from_sentiment(label: str, score: float) -> int:
|
201 |
+
"""
|
202 |
+
label: 'positive' | 'neutre' | 'negatif'
|
203 |
+
score: [-4..+4]
|
204 |
+
Retourne 0..10 aligné avec NPS (detractor ≤6, passive 7–8, promoter ≥9).
|
205 |
+
"""
|
206 |
+
# Mise à l’échelle douce: [-4..+4] -> [0..10]
|
207 |
+
scaled = int(round((float(score) + 4.0) * 1.25)) # -4 -> 0, 0 -> 5, +4 -> 10
|
208 |
+
scaled = max(0, min(10, scaled))
|
209 |
+
if label == "positive":
|
210 |
+
return max(9, scaled) # garantir promoter
|
211 |
+
if label == "negatif":
|
212 |
+
return min(6, scaled) # garantir detractor
|
213 |
+
return 8 if score >= 0 else 7 # neutre
|
214 |
+
|
215 |
# --------- Graphiques ----------
|
216 |
def fig_nps_gauge(nps: Optional[float]) -> go.Figure:
|
217 |
v = 0.0 if nps is None else float(nps)
|
|
|
237 |
fig = px.bar(d2, x="theme", y="count", color="type", barmode="stack", title=f"Top {k} thèmes — balance Pos/Neg")
|
238 |
fig.update_layout(xaxis_tickangle=-30); return fig
|
239 |
|
240 |
+
# --------- Analyse principale (paste-only) ----------
|
241 |
def analyze_text(pasted_txt, has_sc, sep_chr,
|
242 |
do_anonymize, use_oa_sent, use_oa_themes, use_oa_summary,
|
243 |
oa_model, oa_temp, top_k):
|
|
|
249 |
if do_anonymize:
|
250 |
df["comment"]=df["comment"].apply(anonymize)
|
251 |
|
252 |
+
# Si OpenAI indisponible, on bascule silencieusement
|
253 |
if (use_oa_sent or use_oa_themes or use_oa_summary) and not OPENAI_AVAILABLE:
|
254 |
use_oa_sent = use_oa_themes = use_oa_summary = False
|
255 |
|
256 |
+
hf_pipe = make_hf_pipe()
|
257 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
def hf_sent(text:str):
|
259 |
+
if hf_pipe is None or not text.strip(): return None
|
260 |
try:
|
261 |
res=hf_pipe(text); lab=str(res[0]["label"]).lower(); p=float(res[0].get("score",0.5))
|
262 |
if "1" in lab or "2" in lab: return {"label":"negatif","score":-4*p}
|
|
|
268 |
rows=[]
|
269 |
theme_agg=collections.defaultdict(lambda:{"mentions":0,"pos":0,"neg":0})
|
270 |
used_hf=False; used_oa=False
|
271 |
+
any_inferred=False
|
272 |
|
273 |
+
for idx, r in df.iterrows():
|
274 |
+
cid=r.get("id", idx+1); comment=normalize(str(r["comment"]))
|
275 |
|
276 |
# Sentiment: OpenAI -> HF -> règles
|
277 |
sent=None
|
278 |
if use_oa_sent:
|
279 |
+
sent=oa_sentiment(comment, oa_model, float(oa_temp or 0.0)); used_oa = used_oa or bool(sent)
|
280 |
if not sent:
|
281 |
hf=hf_sent(comment)
|
282 |
if hf: sent=hf; used_hf=True
|
|
|
287 |
# Thèmes: regex (+ fusion OpenAI)
|
288 |
themes, counts = detect_themes_regex(comment)
|
289 |
if use_oa_themes:
|
290 |
+
tjson=oa_themes(comment, oa_model, float(oa_temp or 0.0))
|
291 |
if isinstance(tjson, dict):
|
292 |
used_oa=True
|
293 |
for th, c in (tjson.get("counts",{}) or {}).items():
|
|
|
295 |
counts[th] = max(counts.get(th, 0), int(c))
|
296 |
themes = [th for th, c in counts.items() if c > 0]
|
297 |
|
298 |
+
# Note NPS : donnée ou inférée
|
299 |
+
given = r.get("nps_score", None)
|
300 |
+
try:
|
301 |
+
given = int(given) if given is not None and str(given).strip() != "" else None
|
302 |
+
except Exception:
|
303 |
+
given = None
|
304 |
+
|
305 |
+
if given is None:
|
306 |
+
inferred = infer_nps_from_sentiment(sent["label"], float(sent["score"]))
|
307 |
+
nps_final = inferred
|
308 |
+
nps_source = "inferred"
|
309 |
+
any_inferred = True
|
310 |
+
else:
|
311 |
+
nps_final = given
|
312 |
+
nps_source = "given"
|
313 |
+
|
314 |
+
bucket = nps_bucket(nps_final)
|
315 |
|
316 |
for th, c in counts.items():
|
317 |
theme_agg[th]["mentions"] += c
|
318 |
+
if sent["label"] == "positive": theme_agg[th]["pos"] += 1
|
319 |
+
elif sent["label"] == "negatif": theme_agg[th]["neg"] += 1
|
|
|
|
|
320 |
|
321 |
rows.append({
|
322 |
+
"id": cid,
|
323 |
"comment": comment,
|
324 |
+
"nps_score_given": given,
|
325 |
+
"nps_score_inferred": nps_final if given is None else None,
|
326 |
+
"nps_score_final": nps_final,
|
327 |
+
"nps_source": nps_source,
|
328 |
+
"nps_bucket": bucket,
|
329 |
"sentiment_score": round(float(sent["score"]), 3),
|
330 |
"sentiment_label": sent["label"],
|
331 |
"sentiment_source": "openai" if (use_oa_sent and used_oa) else ("huggingface" if used_hf else "rules"),
|
|
|
334 |
})
|
335 |
|
336 |
out_df=pd.DataFrame(rows)
|
337 |
+
nps=compute_nps(out_df["nps_score_final"])
|
338 |
dist=out_df["sentiment_label"].value_counts().to_dict()
|
339 |
|
340 |
# Stats par thème
|
|
|
345 |
"net_sentiment":int(d["pos"]-d["neg"])})
|
346 |
themes_df=pd.DataFrame(trs).sort_values(["total_mentions","net_sentiment"],ascending=[False,False])
|
347 |
|
348 |
+
# Synthèse
|
349 |
method = "OpenAI + HF + règles" if (use_oa_sent and used_hf) else ("OpenAI + règles" if use_oa_sent else ("HF + règles" if used_hf else "Règles"))
|
350 |
+
nps_label = "NPS global (inféré)" if any_inferred else "NPS global"
|
351 |
lines=[ "# Synthèse NPS & ressentis clients",
|
352 |
f"- **Méthode** : {method}",
|
353 |
+
f"- **{nps_label}** : {nps:.1f}" if nps is not None else f"- **{nps_label}** : n/a" ]
|
354 |
if dist:
|
355 |
tot=sum(dist.values()); pos=dist.get("positive",0); neg=dist.get("negatif",0); neu=dist.get("neutre",0)
|
356 |
lines.append(f"- **Répartition émotions** : positive {pos}/{tot}, neutre {neu}/{tot}, négative {neg}/{tot}")
|
|
|
361 |
summary_md="\n".join(lines)
|
362 |
|
363 |
if use_oa_summary:
|
364 |
+
md = oa_summary(nps, dist, themes_df, oa_model, float(oa_temp or 0.0))
|
365 |
if md: summary_md = md + "\n\n---\n" + summary_md
|
366 |
|
367 |
+
# Exports
|
368 |
tmpdir=tempfile.mkdtemp(prefix="nps_gradio_")
|
369 |
enriched=os.path.join(tmpdir,"enriched_comments.csv")
|
370 |
themes=os.path.join(tmpdir,"themes_stats.csv")
|
|
|
378 |
z.write(themes,arcname="themes_stats.csv")
|
379 |
z.write(summ,arcname="summary.md")
|
380 |
|
381 |
+
# Panneaux & Graphes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
def make_panels(dfT: pd.DataFrame):
|
383 |
if dfT is None or dfT.empty: return "—","—","—"
|
384 |
pos_top = dfT.sort_values(["verbatims_pos","total_mentions"], ascending=[False,False]).head(4)
|
|
|
403 |
return ench_md, irr_md, "\n".join(rec_lines)
|
404 |
|
405 |
ench_md, irr_md, reco_md = make_panels(themes_df)
|
406 |
+
fig_gauge = fig_nps_gauge(nps)
|
407 |
+
fig_emots = fig_sentiment_bar(dist)
|
408 |
+
k = max(1, int(top_k or 10))
|
409 |
+
fig_top = fig_top_themes(themes_df, k)
|
410 |
+
fig_bal = fig_theme_balance(themes_df, k)
|
411 |
|
412 |
return (summary_md, themes_df.head(100), out_df.head(200), [enriched, themes, summ, zip_path],
|
413 |
ench_md, irr_md, reco_md, fig_gauge, fig_emots, fig_top, fig_bal)
|
414 |
|
415 |
# ---------------- UI ----------------
|
416 |
with gr.Blocks(title="NPS — Analyse (Assurance)") as demo:
|
417 |
+
gr.Markdown("## 🔎 NPS — Analyse sémantique (Assurance)\nCollez vos verbatims (1 par ligne). Option : note NPS après `|`.")
|
418 |
|
419 |
with gr.Column():
|
420 |
pasted = gr.Textbox(label="Verbatims (un par ligne)", lines=10,
|