Spaces:
Runtime error
Runtime error
update rss structure
Browse files- update_rss.py +25 -3
update_rss.py
CHANGED
|
@@ -2,19 +2,26 @@ import xml.etree.ElementTree as ET
|
|
| 2 |
from datetime import datetime
|
| 3 |
import os
|
| 4 |
from huggingface_hub import InferenceClient
|
|
|
|
| 5 |
|
| 6 |
client = InferenceClient(
|
| 7 |
-
"
|
| 8 |
provider="hf-inference",
|
| 9 |
token=os.getenv("HF_TOKEN"),
|
| 10 |
)
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
|
| 13 |
"""Ask the LLM for a headline and a short description for the podcast episode."""
|
| 14 |
prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
|
| 15 |
1. A catchy, informative headline for a podcast episode about it (max 15 words).
|
| 16 |
2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
|
| 17 |
|
|
|
|
|
|
|
| 18 |
Here is the topic:
|
| 19 |
{subject[:10000]}
|
| 20 |
"""
|
|
@@ -28,7 +35,7 @@ Here is the topic:
|
|
| 28 |
)
|
| 29 |
full_text = response.choices[0].message.content.strip()
|
| 30 |
# Try to split headline and description
|
| 31 |
-
lines = [l.strip() for l in full_text.splitlines() if l.strip()]
|
| 32 |
if len(lines) >= 2:
|
| 33 |
headline = lines[0]
|
| 34 |
description = " ".join(lines[1:])
|
|
@@ -37,6 +44,19 @@ Here is the topic:
|
|
| 37 |
description = full_text
|
| 38 |
return headline, description
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# -----------------------------------------------------------------------------
|
| 41 |
# UPDATE RSS
|
| 42 |
# -----------------------------------------------------------------------------
|
|
@@ -75,5 +95,7 @@ def update_rss(subject, audio_url, audio_length, rss_path="rss.xml"):
|
|
| 75 |
else:
|
| 76 |
channel.append(item)
|
| 77 |
|
| 78 |
-
# Write back to file
|
|
|
|
|
|
|
| 79 |
tree.write(rss_path, encoding="utf-8", xml_declaration=True)
|
|
|
|
| 2 |
from datetime import datetime
|
| 3 |
import os
|
| 4 |
from huggingface_hub import InferenceClient
|
| 5 |
+
import re
|
| 6 |
|
| 7 |
client = InferenceClient(
|
| 8 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
| 9 |
provider="hf-inference",
|
| 10 |
token=os.getenv("HF_TOKEN"),
|
| 11 |
)
|
| 12 |
|
| 13 |
+
def clean_label(line):
|
| 14 |
+
# Remove common label patterns
|
| 15 |
+
return re.sub(r"^\s*(\*\*?)?(Headline|Description)\:?\*?\*?\s*", "", line, flags=re.IGNORECASE)
|
| 16 |
+
|
| 17 |
def generate_headline_and_description(subject: str, steering_question: str | None = None) -> tuple[str, str]:
|
| 18 |
"""Ask the LLM for a headline and a short description for the podcast episode."""
|
| 19 |
prompt = f"""You are a world-class podcast producer. Given the following paper or topic, generate:
|
| 20 |
1. A catchy, informative headline for a podcast episode about it (max 15 words).
|
| 21 |
2. A short, engaging description (2-3 sentences, max 60 words) that summarizes what listeners will learn or why the topic is exciting.
|
| 22 |
|
| 23 |
+
Output ONLY the headline on the first line, and the description on the second line. Do NOT include any labels, markdown, or extra formatting.
|
| 24 |
+
|
| 25 |
Here is the topic:
|
| 26 |
{subject[:10000]}
|
| 27 |
"""
|
|
|
|
| 35 |
)
|
| 36 |
full_text = response.choices[0].message.content.strip()
|
| 37 |
# Try to split headline and description
|
| 38 |
+
lines = [clean_label(l.strip()) for l in full_text.splitlines() if l.strip()]
|
| 39 |
if len(lines) >= 2:
|
| 40 |
headline = lines[0]
|
| 41 |
description = " ".join(lines[1:])
|
|
|
|
| 44 |
description = full_text
|
| 45 |
return headline, description
|
| 46 |
|
| 47 |
+
def indent(elem, level=0):
|
| 48 |
+
i = "\n" + level * " "
|
| 49 |
+
if len(elem):
|
| 50 |
+
if not elem.text or not elem.text.strip():
|
| 51 |
+
elem.text = i + " "
|
| 52 |
+
for child in elem:
|
| 53 |
+
indent(child, level + 1)
|
| 54 |
+
if not elem.tail or not elem.tail.strip():
|
| 55 |
+
elem.tail = i
|
| 56 |
+
else:
|
| 57 |
+
if level and (not elem.tail or not elem.tail.strip()):
|
| 58 |
+
elem.tail = i
|
| 59 |
+
|
| 60 |
# -----------------------------------------------------------------------------
|
| 61 |
# UPDATE RSS
|
| 62 |
# -----------------------------------------------------------------------------
|
|
|
|
| 95 |
else:
|
| 96 |
channel.append(item)
|
| 97 |
|
| 98 |
+
# Write back to file with pretty formatting
|
| 99 |
+
indent(root)
|
| 100 |
+
ET.register_namespace('itunes', "http://www.itunes.com/dtds/podcast-1.0.dtd")
|
| 101 |
tree.write(rss_path, encoding="utf-8", xml_declaration=True)
|