Spaces:

edouardfoussier
/

rag-rh-assistant

Sleeping

App Files Files Community

edouardfoussier commited on 18 days ago

Commit

85504aa

1 Parent(s): 525a9ab

big update of custom chatbot + sidebar layout + sources generation

Browse files

Files changed (6) hide show

app.py +131 -122
assets/chatbot.png +3 -0
helpers.py +109 -7
rag/retrieval.py +40 -26
rag/synth.py +40 -139
rag/utils.py +11 -0

app.py CHANGED Viewed

@@ -1,147 +1,156 @@
-import os, time
 from dotenv import load_dotenv
-# Load environment variables BEFORE importing rag modules
 load_dotenv(override=True)
-import gradio as gr
-from rag.retrieval import search, embed
-from rag.synth import synth_answer_stream, render_sources
-from helpers import linkify_text_with_sources
-missing = []
-if not os.getenv("HF_API_TOKEN"): missing.append("HF_API_TOKEN (embeddings)")
-if not os.getenv("LLM_MODEL"):    print("[INFO] LLM_MODEL not set, using default", flush=True)
-print("[ENV] Missing:", ", ".join(missing) or "None", flush=True)
-# HF_API_TOKEN = os.getenv("HF_API_TOKEN")
-# def sanity():
-#     ok = bool(os.getenv("HF_API_TOKEN"))
-#     v = embed("hello world")
-#     return f"Token set? {ok}\nEmbedding dim: {len(v)}"
-# def rag_chat(user_question, openai_key):
-#     if not openai_key:
-#         return "❌ Please provide your OpenAI API key."
-#     # Inject the key into environment so synth can use it
-#     os.environ["OPENAI_API_KEY"] = openai_key
-#     # Step 1: Retrieve top passages
-#     hits = search(user_question, top_k=8)
-#     if not hits:
-#         return "❌ Sorry, no relevant information found."
-    # # Step 2: Generate synthesized answer
-    # try:
-    #     final_answer = synth_answer(user_question, hits[:5])
-    #     final_answer = linkify(final_answer, hits[:5])
-    #     final_answer += "\n\n---\n" + render_sources(hits[:5])
-    # except Exception as e:
-    #     final_answer = f"❌ Error during synthesis: {e}"
-    # return final_answer
-# def rag_chat(user_question, openai_key):
-#     if not openai_key:
-#         yield "❌ Please provide your OpenAI API key."
-#         return
-#     os.environ["OPENAI_API_KEY"] = openai_key
-#     hits = search(user_question, top_k=8)
-#     if not hits:
-#         yield "❌ Sorry, no relevant information found."
-#         return
-#     acc = ""
-#     try:
-#         for piece in synth_answer_stream(user_question, hits[:5]):
-#             acc += piece or ""
-#             # stream raw text while typing (no links yet to avoid jumpiness)
-#             yield acc
-#     except Exception as e:
-#         partial = acc if acc.strip() else ""
-#         yield (partial + ("\n\n" if partial else "") + f"❌ Streaming error: {e}")
-#         return
-#     final_md = linkify_text_with_sources(acc, hits[:5])
-#     yield final_md
-# with gr.Blocks() as demo:
-#     gr.Markdown("## 🤖 HR Assistant (RAG)\nAsk your question below:")
-#     with gr.Row():
-#         api_key = gr.Textbox(label="🔑 Your OpenAI API Key", type="password")
-#     question = gr.Textbox(label="❓ Your Question", placeholder="e.g., Quels sont les droits à congés ?")
-#     answer = gr.Markdown(label="💡 Assistant Answer")
-#     submit_btn = gr.Button("Ask")
-#     submit_btn.click(fn=rag_chat, inputs=[question, api_key], outputs=answer)
-# if __name__ == "__main__":
-#     demo.launch()
-def rag_chat(user_question: str, openai_key: str):
-    """Generator: streams draft text to a Textbox, then yields final Markdown."""
-    if not openai_key:
-        yield "❌ Please provide your OpenAI API key.", None
         return
-    os.environ["OPENAI_API_KEY"] = openai_key.strip()
-    # Step 1: retrieve
-    yield "⏳ Recherche des passages pertinents…", None
-    hits = search(user_question, top_k=8)
-    if not hits:
-        yield "❌ Sorry, no relevant information found.", None
-        return
-    # Step 2: stream LLM synthesis
     acc = ""
     try:
-        for piece in synth_answer_stream(user_question, hits[:5]):
-            acc += piece or ""
-            # Stream into the draft textbox; keep markdown empty during typing
-            yield acc, None
     except Exception as e:
-        yield f"❌ Error during synthesis: {e}", None
         return
-    # Step 3: finalize + linkify citations in Markdown block
-    md = linkify_text_with_sources(acc, hits[:5])
-    yield acc, md
-with gr.Blocks() as demo:
-    gr.Markdown("## 🤖 HR Assistant (RAG)\nAsk your question below:")
     with gr.Row():
-        api_key = gr.Textbox(label="🔑 Your OpenAI API Key", type="password", placeholder="sk-…")
-    question = gr.Textbox(label="❓ Your Question", placeholder="e.g., Quels sont les droits à congés ?")
-    # live streaming target
-    draft_answer = gr.Markdown(label="💬 Réponse")
-    # final pretty markdown with clickable links
-    # final_answer = gr.Markdown()
-    with gr.Row():
-        submit_btn = gr.Button("Ask", variant="primary")
-        clear_btn = gr.Button("Clear")
-    submit_btn.click(
-        fn=rag_chat,
-        inputs=[question, api_key],
-        outputs=[draft_answer, final_answer],
-        show_progress="full",  # shows loader on the button
-    )
-    clear_btn.click(lambda: ("", ""), outputs=[draft_answer, final_answer])
 if __name__ == "__main__":
     demo.queue().launch()

+import os
+import gradio as gr
+from gradio import update as gr_update  # tiny alias
+from copy import deepcopy
 from dotenv import load_dotenv
 load_dotenv(override=True)
+from rag.retrieval import search, ensure_ready
+from rag.synth import synth_answer_stream
+from helpers import _extract_cited_indices, linkify_text_with_sources, _group_sources_md
+# ---------- Warm-Up ----------
+def _warmup():
+    try:
+        ensure_ready()
+        return "✅ Modèles initialisés !"
+    except Exception as e:
+        return f"⚠️ Warmup a échoué : {e}"
+# ---------- Chat step 1: add user message ----------
+def add_user(user_msg: str, history: list[tuple]) -> tuple[str, list[tuple]]:
+    user_msg = (user_msg or "").strip()
+    if not user_msg:
+        return "", history
+    # append a placeholder assistant turn for streaming
+    history = history + [(user_msg, "")]
+    return "", history
+# ---------- Chat step 2: stream assistant answer ----------
+def bot(history: list[tuple], api_key: str, top_k: int):
+    """
+    Yields (history, sources_markdown) while streaming.
+    """
+    if not history:
+        yield history, "### Sources\n_(none)_"
+        return
+    if api_key:
+        os.environ["OPENAI_API_KEY"] = api_key.strip()
+    user_msg, _ = history[-1]
+    # Retrieval
+    k = int(max(top_k, 1))
+    try:
+        hits = search(user_msg, top_k=k)
+    except Exception as e:
+        history[-1] = (user_msg, f"❌ Retrieval error: {e}")
+        yield history, "### Sources\n_(none)_"
         return
+    sources_md = sources_markdown(hits[:k])
+    # show a small “thinking” placeholder immediately
+    history[-1] = (user_msg, "⏳ Synthèse en cours…")
+    yield history, "### 📚 Sources"
+    # Streaming LLM
     acc = ""
     try:
+        for chunk in synth_answer_stream(user_msg, hits[:k]):
+            acc += chunk or ""
+            step_hist = deepcopy(history)
+            step_hist[-1] = (user_msg, acc)
+            yield step_hist, "### 📚 Sources"
     except Exception as e:
+        history[-1] = (user_msg, f"❌ Synthèse: {e}")
+        yield history, sources_md
         return
+    # Finalize + linkify citations
+    acc_linked = linkify_text_with_sources(acc, hits[:k])
+    history[-1] = (user_msg, acc_linked)
+    # Construit la section sources à partir des citations réelles [n]
+    used = _extract_cited_indices(acc_linked, k)
+    grouped_sources = _group_sources_md(hits[:k], used)
+    yield history, grouped_sources
+    # yield history, sources_md
+# ---------- UI ----------
+with gr.Blocks(theme="soft", fill_height=True) as demo:
+    gr.Markdown("# 🇫🇷 Assistant RH — Chat RAG")
+            # Warmup status (put somewhere visible)
+    status = gr.Markdown("⏳ Initialisation des modèles du RAG…")
+    # Sidebar (no 'label' arg)
+    with gr.Sidebar(open=True):
+        gr.Markdown("## ⚙️ Paramètres")
+        api_key = gr.Textbox(
+            label="🔑 OpenAI API Key (BYOK — never stored)",
+            type="password",
+            placeholder="sk-… (optional if set in env)"
+        )
+        topk = gr.Slider(1, 10, value=5, step=1, label="Top-K passages")
+        # you can wire this later; not used now
+        save_history = gr.Checkbox(label="Ajouter un modèle eranker")
     with gr.Row():
+        with gr.Column(scale=4):
+            chat = gr.Chatbot(
+                label="Chat Interface",
+                height="65vh",
+                show_copy_button=False,
+                avatar_images=(
+                    "https://raw.githubusercontent.com/gradio-app/gradio/main/gradio/icons/huggingface-logo.svg",
+                    "assets/chatbot.png",
+                ),
+                render_markdown=True,
+                show_label=False,
+                placeholder="<p style='text-align: center;'>Bonjour 👋,</p><p style='text-align: center;'>Je suis votre assistant HR. Je me tiens prêt à répondre à vos questions.</p>"
+            )
+            # input row
+            with gr.Row(equal_height=True):
+                msg = gr.Textbox(
+                    placeholder="Posez votre question…",
+                    show_label=False,
+                    scale=5,
+                )
+                send = gr.Button("Envoyer", variant="primary", scale=1)
+        with gr.Column(scale=1):
+            sources = gr.Markdown("### 📚 Sources\n_Ici, vous pourrez consulter les sources utilisées pour formuler la réponse._")
+    state = gr.State([])  # chat history: list[tuple(user, assistant)]
+    # wire events: user submits -> add_user -> bot streams
+    send_click = send.click(add_user, [msg, state], [msg, state])
+    send_click.then(
+        bot,
+        [state, api_key, topk],
+        [chat, sources],
+        show_progress="full",
+    ).then(lambda h: h, chat, state)
+    msg_submit = msg.submit(add_user, [msg, state], [msg, state])
+    msg_submit.then(
+        bot,
+        [state, api_key, topk],
+        [chat, sources],
+        show_progress="full",
+    ).then(lambda h: h, chat, state)
+    demo.load(_warmup, inputs=None, outputs=status)
 if __name__ == "__main__":
     demo.queue().launch()

assets/chatbot.png ADDED Viewed

Git LFS Details

SHA256: 9daa93e27f8a3e5ea504737bebc879f7cd37a1895acfdc5ac5b092c9a7650e3e
Pointer size: 132 Bytes
Size of remote file: 1.15 MB

helpers.py CHANGED Viewed

@@ -1,11 +1,39 @@
 import re
 def linkify_text_with_sources(text: str, passages: list[dict]) -> str:
     """
-    Convert [1], [2]… in `text` to markdown links using the corresponding
-    passage payloads (expects top-5 `hits` from your retriever).
     """
-    # Build mapping: 1-based index -> (title, url)
     mapping = {}
     for i, h in enumerate(passages, start=1):
         p = h.get("payload", h) or {}
@@ -17,9 +45,83 @@ def linkify_text_with_sources(text: str, passages: list[dict]) -> str:
         idx = int(m.group(1))
         title, url = mapping.get(idx, (None, None))
         if url:
-            # turn [n] into [n](url "title")
-            return f"[{idx}]({url} \"{title}\")"
-        # leave as plain [n] if no URL
         return m.group(0)
-    return re.sub(r"\[(\d+)\]", _sub, text)

 import re
+from collections import OrderedDict
+CITATION_RE = re.compile(r"\[(\d+)\]")
+def is_unknown_answer(txt: str) -> bool:
+    """Detect 'no answer' / 'reformulate' replies."""
+    s = (txt or "").lower()
+    patterns = [
+        "Je suis navré, je n'ai pas trouvé la réponse",
+        "Je ne sais pas",
+        "Je ne comprends pas la question"
+        "Pourriez-vous reformuler",
+        "je n'ai pas trouvé d'information pertinente",
+    ]
+    return any(p in s for p in patterns)
+def _extract_cited_indices(text: str, k: int) -> list[int]:
+    """Renvoie les indices (1..k) réellement cités dans le texte, sans doublon, ordonnés."""
+    seen = OrderedDict()
+    for m in CITATION_RE.finditer(text or ""):
+        try:
+            n = int(m.group(1))
+            if 1 <= n <= k and n not in seen:
+                seen[n] = True
+        except Exception:
+            pass
+    return list(seen.keys())
 def linkify_text_with_sources(text: str, passages: list[dict]) -> str:
     """
+    Convertit [1], [2]… en vrais liens Markdown vers les sources.
     """
+    import re
     mapping = {}
     for i, h in enumerate(passages, start=1):
         p = h.get("payload", h) or {}
         idx = int(m.group(1))
         title, url = mapping.get(idx, (None, None))
         if url:
+            # simple lien markdown [1](url)
+            return f"[_[{idx}]_]({url} \"{title}\")"
         return m.group(0)
+    return re.sub(r"\[(\d+)\]", _sub, text)
+def _group_sources_md(passages: list[dict], used_idxs: list[int]) -> str:
+    """
+    Construit le markdown groupé :
+    ### 📚 Sources (N)
+    1. [Titre](url) _(extrait #1, 3)_
+    2. [Autre](url2) _(extrait #2)_
+    """
+    if not passages:
+        return "### 📚 Sources (0)\n_(aucune)_"
+    # Utiliser les indices cités si dispo, sinon tomber sur 1..len(passages)
+    if not used_idxs:
+        used_idxs = list(range(1, len(passages) + 1))
+    # Groupe par (url ou titre normalisé)
+    groups = []  # [(key, title, url, [idxs])]
+    key_to_pos = {}
+    for idx in used_idxs:
+        p = passages[idx-1]
+        pl = p.get("payload", p) or {}
+        title = (pl.get("title") or pl.get("url") or f"Source {idx}").strip()
+        url = pl.get("url")
+        key = (url or "").strip().lower() or title.lower()
+        if key not in key_to_pos:
+            key_to_pos[key] = len(groups)
+            groups.append([key, title, url, []])
+        groups[key_to_pos[key]][3].append(idx)
+    # Ordonner chaque liste d'indices et construire le markdown
+    lines = [f"### 📚 Sources ({len(groups)})"] if len(groups) > 1 else [f"### 📚 Source"]
+    for i, (_, title, url, idxs) in enumerate(groups, start=1):
+        idxs = sorted(idxs)
+        idx_txt = ", ".join(map(str, idxs))
+        label = "extrait" if len(idxs) == 1 else "extraits"
+        suffix = f" _({label} # {idx_txt})_"
+        if url:
+            lines.append(f"{i}. [{title}]({url}){suffix}")
+        else:
+            lines.append(f"{i}. {title}{suffix}")
+    return "\n".join(lines)
+# def sources_markdown(passages: list[dict]) -> str:
+#     if not passages:
+#         return "### Sources\n_(aucune)_"
+#     lines = [f"### 📚 Sources ({len(passages)})"]
+#     for i, h in enumerate(passages, start=1):
+#         p = h.get("payload", h) or {}
+#         title = (p.get("title") or p.get("url") or f"Source {i}").strip()
+#         url = p.get("url")
+#         score = h.get("score")
+#         # snippet = (p.get("text") or "").strip().replace("\n", " ")
+#         # # on coupe le snippet pour pas que ce soit trop long
+#         # if len(snippet) > 180:
+#         #     snippet = snippet[:180] + "…"
+#         # ligne principale
+#         if url:
+#             line = f"{i}. [{title}]({url})"
+#         else:
+#             line = f"{i}. {title}"
+#         # on ajoute le score et snippet en italique, plus discrets
+#         if isinstance(score, (int, float)):
+#             line += f" _(score {score:.3f})_"
+#         # if snippet:
+#         #     line += f"\n   > {snippet}"
+#         lines.append(line)
+#     return "\n".join(lines)

rag/retrieval.py CHANGED Viewed

@@ -1,6 +1,6 @@
-import os, threading, ast
 from typing import List, Dict, Any, Optional, Tuple
 import numpy as np
 from datasets import load_dataset
 from huggingface_hub import InferenceClient
@@ -13,7 +13,6 @@ DATASETS = [
 HF_EMBED_MODEL = os.getenv("HF_EMBEDDINGS_MODEL", "BAAI/bge-m3")
 HF_API_TOKEN  = os.getenv("HF_API_TOKEN")
-# Try FAISS; fallback to NumPy if import fails
 _USE_FAISS = True
 try:
     import faiss  # type: ignore
@@ -21,8 +20,8 @@ except Exception:
     _USE_FAISS = False
 _embed_client: Optional[InferenceClient] = None
-_index = None           # faiss index or np.ndarray
-_payloads = None        # list[dict]
 _lock = threading.Lock()
 def _client() -> InferenceClient:
@@ -61,15 +60,28 @@ def _load_corpus() -> Tuple[np.ndarray, List[Dict[str, Any]]]:
     X = np.stack(vecs, axis=0)
     return X, payloads
 def _build_index():
     X, payloads = _load_corpus()
-    if _USE_FAISS:
-        dim = X.shape[1]
-        idx = faiss.IndexFlatIP(dim)
-        idx.add(X)
-        return idx, payloads
-    else:
-        return X, payloads  # NumPy fallback
 def _ensure():
     global _index, _payloads
@@ -80,24 +92,11 @@ def _ensure():
 def _search_numpy(X: np.ndarray, q: np.ndarray, k: int):
     scores = X @ q
-    k = min(k, len(scores))
     part = np.argpartition(-scores, k-1)[:k]
     order = part[np.argsort(-scores[part])]
     return scores[order], order
-def rerank_cosine(query_vec, hits, top_k=5):
-    # Re-embed candidate texts and compare? (expensive)
-    # or use retrieval scores only (already cosine). If using NumPy fallback,
-    # you can keep as is. For a tiny boost, score by length-normalized match:
-    scored = []
-    for h in hits:
-        txt = (h["payload"].get("text") or "")
-        # penalize super-long chunks a bit
-        penalty = 1.0 / (1.0 + len(txt)/1500.0)
-        scored.append((h["score"] * penalty, h))
-    scored.sort(key=lambda x: x[0], reverse=True)
-    return [h for _, h in scored[:top_k]]
 def search(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
     _ensure()
     q = embed(query)
@@ -113,3 +112,18 @@ def search(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
         hits.append({"score": float(s), "payload": p})
     return hits

+# retrieval.py
+import os, ast, threading
 from typing import List, Dict, Any, Optional, Tuple
 import numpy as np
 from datasets import load_dataset
 from huggingface_hub import InferenceClient
 HF_EMBED_MODEL = os.getenv("HF_EMBEDDINGS_MODEL", "BAAI/bge-m3")
 HF_API_TOKEN  = os.getenv("HF_API_TOKEN")
 _USE_FAISS = True
 try:
     import faiss  # type: ignore
     _USE_FAISS = False
 _embed_client: Optional[InferenceClient] = None
+_index = None
+_payloads = None
 _lock = threading.Lock()
 def _client() -> InferenceClient:
     X = np.stack(vecs, axis=0)
     return X, payloads
+CACHE = "/tmp/rag_index.npz"
+def _faiss_from_X(X):
+    import faiss
+    idx = faiss.IndexFlatIP(X.shape[1]); idx.add(X); return idx
 def _build_index():
+    # try cached index
+    if os.path.exists(CACHE):
+        try:
+            d = np.load(CACHE, allow_pickle=True)
+            X = d["X"]
+            payloads = d["payloads"].tolist()
+            return (_faiss_from_X(X) if _USE_FAISS else X), payloads
+        except Exception:
+            # cache corrupted → rebuild
+            try: os.remove(CACHE)
+            except: pass
+    # build fresh
     X, payloads = _load_corpus()
+    np.savez_compressed(CACHE, X=X, payloads=np.array(payloads, dtype=object))
+    return (_faiss_from_X(X) if _USE_FAISS else X), payloads
 def _ensure():
     global _index, _payloads
 def _search_numpy(X: np.ndarray, q: np.ndarray, k: int):
     scores = X @ q
+    k = max(1, min(k, len(scores)))
     part = np.argpartition(-scores, k-1)[:k]
     order = part[np.argsort(-scores[part])]
     return scores[order], order
 def search(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
     _ensure()
     q = embed(query)
         hits.append({"score": float(s), "payload": p})
     return hits
+# ---------- explicit warm-up helpers ----------
+def warm_up_sync():
+    try:
+        _ = search("warmup", top_k=3)
+    except Exception:
+        pass
+def warm_up_async():
+    t = threading.Thread(target=warm_up_sync, daemon=True)
+    t.start()
+def ensure_ready():
+    """Build the index once and warm the embedding endpoint."""
+    _ensure()          # builds FAISS/NumPy index + loads payloads
+    _ = embed("warmup")  # hits HF Inference API once to avoid cold-start

rag/synth.py CHANGED Viewed

@@ -1,157 +1,58 @@
 import os
 from openai import OpenAI
 LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini")
 LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api.openai.com/v1")
 def _build_prompt(query, passages):
-    ctx = "\n\n".join([(p["payload"].get("text") or "") for p in passages])
     return (
-        "Tu es un assistant RH de la fonction publique française.\n"
-        "- Réponds de façon factuelle et concise.\n"
-        "- Cite les sources en fin de phrase avec [1], [2]… basées sur l’ordre des passages.\n"
-        "- Si l’info n’est pas dans les sources, réponds « Je ne sais pas ».\n\n"
-        f"Question: {query}\n\nSources (indexées):\n{ctx}\n\nRéponse:"
     )
 def synth_answer_stream(query, passages):
     client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=LLM_BASE_URL)
-    prompt = _build_prompt(query, passages)
     stream = client.chat.completions.create(
         model=LLM_MODEL,
         messages=[{"role": "user", "content": prompt}],
         temperature=0.2,
-        stream=True,  # 👈 IMPORTANT
     )
-    # The SDK yields events with deltas
-    for event in stream:
-        delta = getattr(getattr(event, "choices", [None])[0], "delta", None)
-        if delta and delta.content:
-            yield delta.content
-# def linkify(text, passages):
-#     # (optional) keep simple: return text as-is for now
-#     return text
-def render_sources(passages):
-    lines = []
-    for i, p in enumerate(passages, 1):
-        title = (p["payload"].get("title") or "").strip() or "Sans titre"
-        url = p["payload"].get("url") or ""
-        lines.append(f"[{i}] {title}{' – ' + url if url else ''}")
-    return "\n".join(lines)
-# def linkify_text_with_sources(text: str, passages):
-#     """
-#     Replace [1], [2]... with clickable links if the passage has a URL.
-#     Also append a Sources section as a numbered list.
-#     """
-#     # Build a map: 1-based index -> url
-#     urls = []
-#     for p in passages:
-#         url = (p["payload"].get("url") or "").strip()
-#         urls.append(url if url.startswith("http") else "")
-#     # Inline [n] -> [n](url) when available
-#     out = text
-#     for i, url in enumerate(urls, start=1):
-#         if url:
-#             out = out.replace(f"[{i}]", f"[{i}]({url})")
-#     # Add a Sources section
-#     lines = ["\n\n---\n**Sources**"]
-#     for i, p in enumerate(passages, start=1):
-#         title = (p["payload"].get("title") or "").strip() or "Sans titre"
-#         url = (p["payload"].get("url") or "").strip()
-#         if url.startswith("http"):
-#             lines.append(f"{i}. [{title}]({url})")
-#         else:
-#             lines.append(f"{i}. {title}")
-#     return out + "\n" + "\n".join(lines)
-# import os
-# from openai import OpenAI
-# LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini")
-# LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api.openai.com/v1")
-# def _first_k_chars(text, k=1200):
-#     t = text.strip()
-#     return t[:k] + ("…" if len(t) > k else "")
-# def _build_prompt(query, passages):
-#     chunks = []
-#     for i, p in enumerate(passages, 1):
-#         txt = p["payload"].get("text") or ""
-#         chunks.append(f"[{i}] {_first_k_chars(txt)}")
-# # def _build_prompt(query, passages):
-# #     chunks = []
-# #     for i, p in enumerate(passages, 1):
-# #         txt = p["payload"].get("text") or ""
-#         # chunks.append(f"[{i}] {txt}")
-#     context = "\n\n".join(chunks)
-#     return f"""Tu es un assistant RH de la fonction publique française.
-# - Réponds de manière factuelle et concise.
-# - Cite tes sources en fin de phrase avec [n] correspondant aux extraits ci-dessous.
-# - Si l’information n’est pas dans les sources, réponds : “Je ne sais pas”.
-# - Ne fabrique pas de liens ni de références.
-# Question: {query}
-# Extraits indexés:
-# {context}
-# Réponse:"""
-# def synth_answer_stream(query, passages):
-#     client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=LLM_BASE_URL)
-#     prompt = _build_prompt(query, passages)
-#     # ✅ Correct streaming usage
-#     stream = client.chat.completions.create(
-#         model=LLM_MODEL,
-#         messages=[{"role": "user", "content": prompt}],
-#         temperature=0.2,
-#         stream=True,  # <- this is key
-#     )
-#     for chunk in stream:
-#         delta = getattr(chunk.choices[0].delta, "content", None)
-#         if delta:
-#             acc.append(delta)
-#             yield delta  # stream piece by piece
-# # def synth_answer(query, passages):
-# #     client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=LLM_BASE_URL)
-# #     prompt = _build_prompt(query, passages)
-# #     resp = client.chat.completions.create(
-# #         model=LLM_MODEL,
-# #         messages=[{"role": "user", "content": prompt}],
-# #         temperature=0.2,
-# #     )
-# #     return resp.choices[0].message.content.strip()
-# # --- HELPERS
-# def render_sources(passages):
-#     lines = []
-#     for i, p in enumerate(passages, 1):
-#         pl = p["payload"]
-#         title = (pl.get("title") or "Source").strip()
-#         url = pl.get("url") or ""
-#         lines.append(f"[{i}] {title}" + (f" — {url}" if url else ""))
-#     return "\n".join(lines)
-# def linkify(text, passages):
-#     # turn [1] -> markdown link when url exists
-#     for i, p in enumerate(passages, 1):
-#         url = p["payload"].get("url")
-#         if url:
-#             text = text.replace(f"[{i}]", f"[{i}]({url})")
-#     return text

+# rag/synth.py
 import os
 from openai import OpenAI
+from rag.utils import utf8_safe
 LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini")
 LLM_BASE_URL = os.getenv("LLM_BASE_URL", "https://api.openai.com/v1")
 def _build_prompt(query, passages):
+    from rag.utils import utf8_safe
+    # Construire des blocs numérotés et balisés
+    blocks = []
+    for i, h in enumerate(passages, start=1):
+        p = h.get("payload", h) or {}
+        title = (p.get("title") or p.get("url") or f"Source {i}").strip()
+        url   = p.get("url") or ""
+        text  = utf8_safe(p.get("text") or "")
+        # Chaque bloc porte explicitement son index [i]
+        blocks.append(
+            f"### Source [{i}] — {title}\n"
+            f"{('URL: ' + url) if url else ''}\n"
+            f"{text}\n"
+        )
+    context = "\n\n".join(blocks)
+    query = utf8_safe(query)
     return (
+        "Tu es un assistant RH chargé de répondre à des questions dans le domaine des ressources humaines en t'appuyant sur les sources fournies.\n"
+        "Consignes :\n"
+        "- Réponds de manière factuelle, concise et polie (vouvoiement).\n"
+        "- Quand tu affirmes un fait, cite tes sources en fin de phrase avec le format [1], [2]… en te basant sur l'index de ces sources (ex: [1] est la source 1, [2] est la source 2, etc.)\n\n"
+        "- Si l'information n'est pas présente dans les sources, réponds : \"Je suis navré, je n'ai pas trouvé la réponse à cette question\".\n\n"
+        "- Si la question est mal formulée, réponds : \"Je ne comprends pas la question. Pourriez-vous reformuler ?\"\n\n"
+        "- Ne fabrique pas de liens ni de références.\n\n"
+        f"Question: {query}\n"
+        f"Sources (indexées) : {context}\n\n"
+        "Réponse:"
     )
 def synth_answer_stream(query, passages):
     client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"), base_url=LLM_BASE_URL)
+    prompt = utf8_safe(_build_prompt(query, passages))
     stream = client.chat.completions.create(
         model=LLM_MODEL,
         messages=[{"role": "user", "content": prompt}],
         temperature=0.2,
+        stream=True,
     )
+    for event in stream:
+        if not getattr(event, "choices", None):
+            continue
+        delta = event.choices[0].delta
+        if delta and getattr(delta, "content", None):
+            yield utf8_safe(delta.content or "")

rag/utils.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# rag/utils.py
+import unicodedata
+def utf8_safe(s: str) -> str:
+    if not isinstance(s, str):
+        s = str(s)
+    # normalise et remplace le tiret cadratin par un simple '-'
+    s = unicodedata.normalize("NFC", s)
+    s = s.replace("\u2014", "-")
+    # Si une lib force l'ASCII en dessous, on garde quand même tout ce qui est encodable UTF-8
+    return s.encode("utf-8", "ignore").decode("utf-8", "ignore")