marks commited on
Commit
b5f9861
·
1 Parent(s): 697ec60

Upgraded prompt

Browse files
Files changed (4) hide show
  1. prompt_templates.py +29 -0
  2. setup.py +5 -1
  3. text_processor.py +84 -0
  4. tts.py +4 -0
prompt_templates.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PODCAST_SYSTEM_PROMPT = """You are a professional podcast scriptwriter. Follow these rules strictly:
2
+ 1. Write in natural, conversational prose only
3
+ 2. Never use markdown formatting
4
+ 3. Never write dialog or conversation format
5
+ 4. Never use speaker labels, colons, or turn-taking
6
+ 5. Never include stage directions or [bracketed text]
7
+ 6. Never use asterisks, underscores, or other formatting symbols
8
+ 7. Write as a continuous narrative
9
+ 8. Avoid technical jargon unless explicitly explaining it
10
+ 9. Use complete sentences and proper transitions
11
+ 10. Never include URLs or raw links
12
+
13
+ Bad example:
14
+ John: This is interesting
15
+ [excited] Mary: I agree!
16
+
17
+ Good example:
18
+ This topic is particularly interesting, and there's strong agreement among experts about its significance.
19
+ """
20
+
21
+ def create_podcast_prompt(topic, duration_minutes=10):
22
+ return f"""Using the style guidelines provided, create a {duration_minutes}-minute podcast script about {topic}.
23
+ Focus on creating engaging, flowing narrative content that a single voice can narrate naturally.
24
+ The content should be informative yet conversational, avoiding any formatting or dialog structure."""
25
+
26
+ def create_episode_segments(topic, segments=3):
27
+ return f"""Create {segments} distinct segments about {topic}.
28
+ Each segment should flow naturally into the next, using clear transitional phrases.
29
+ Remember to maintain a single narrative voice throughout."""
setup.py CHANGED
@@ -6,6 +6,10 @@ setup(
6
  packages=find_packages(),
7
  install_requires=[
8
  'rich',
9
- # add other dependencies here
 
 
 
 
10
  ]
11
  )
 
6
  packages=find_packages(),
7
  install_requires=[
8
  'rich',
9
+ 'requests',
10
+ 'python-dotenv',
11
+ 'openai', # If using OpenAI
12
+ 'anthropic', # If using Claude
13
+ 'regex', # For more advanced regex operations
14
  ]
15
  )
text_processor.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def remove_dialog_formatting(text):
4
+ """Remove common dialog markers and formatting."""
5
+ # Remove speaker labels (e.g., "John:", "JOHN:", "[John]:")
6
+ text = re.sub(r'^[A-Z0-9\[\]]+:', '', text, flags=re.MULTILINE)
7
+ text = re.sub(r'^[A-Z][a-z]+:', '', text, flags=re.MULTILINE)
8
+
9
+ # Remove parenthetical stage directions
10
+ text = re.sub(r'\([^)]*\)', '', text)
11
+ text = re.sub(r'\[[^\]]*\]', '', text)
12
+
13
+ return text
14
+
15
+ def remove_breakthrough_formatting(text):
16
+ """Remove any LLM formatting that made it through the prompts."""
17
+ patterns = [
18
+ (r'^.*?:\s*', ''), # Remove any remaining speaker labels
19
+ (r'\[.*?\]', ''), # Remove any breakthrough brackets
20
+ (r'\(.*?\)', ''), # Remove any breakthrough parentheticals
21
+ (r'"\w+:"', ''), # Remove quoted speaker labels
22
+ (r'<.*?>', ''), # Remove any HTML-like tags
23
+ (r'---.*?---', ''), # Remove any section separators
24
+ (r'#\s*\w+', ''), # Remove any hashtag sections
25
+ ]
26
+
27
+ for pattern, replacement in patterns:
28
+ text = re.sub(pattern, replacement, text, flags=re.MULTILINE)
29
+ return text
30
+
31
+ def convert_to_monologue(text):
32
+ """Convert multi-party dialog into a flowing narrative."""
33
+ # Replace dialog markers with transitional phrases
34
+ transitions = [
35
+ "Then", "After that", "Next", "Following that",
36
+ "Subsequently", "Moving on", "Additionally"
37
+ ]
38
+
39
+ lines = text.split('\n')
40
+ narrative = []
41
+ current_transition = 0
42
+
43
+ for line in lines:
44
+ if line.strip():
45
+ # Remove speaker labels if any
46
+ cleaned_line = re.sub(r'^[A-Z0-9\[\]]+:\s*', '', line)
47
+ cleaned_line = re.sub(r'^[A-Z][a-z]+:\s*', '', cleaned_line)
48
+
49
+ # Add transition if it seems like a new thought
50
+ if narrative and cleaned_line[0].isupper():
51
+ narrative.append(f"{transitions[current_transition]}, {cleaned_line.lower()}")
52
+ current_transition = (current_transition + 1) % len(transitions)
53
+ else:
54
+ narrative.append(cleaned_line)
55
+
56
+ return ' '.join(narrative)
57
+
58
+ def clean_formatting(text):
59
+ """Remove markdown and other formatting symbols."""
60
+ # Remove markdown formatting
61
+ text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # Bold
62
+ text = re.sub(r'\*(.+?)\*', r'\1', text) # Italic
63
+ text = re.sub(r'\_(.+?)\_', r'\1', text) # Underscore emphasis
64
+ text = re.sub(r'\~\~(.+?)\~\~', r'\1', text) # Strikethrough
65
+
66
+ # Remove code blocks and inline code
67
+ text = re.sub(r'```[\s\S]*?```', '', text)
68
+ text = re.sub(r'`[^`]*`', '', text)
69
+
70
+ return text
71
+
72
+ def process_for_podcast(text):
73
+ """Main function to process text for podcast narration."""
74
+ text = remove_dialog_formatting(text)
75
+ text = clean_formatting(text)
76
+ text = remove_breakthrough_formatting(text)
77
+ text = convert_to_monologue(text)
78
+
79
+ # Additional cleanups
80
+ text = re.sub(r'\s+', ' ', text) # Remove multiple spaces
81
+ text = re.sub(r'\n+', ' ', text) # Remove newlines
82
+ text = text.strip()
83
+
84
+ return text
tts.py CHANGED
@@ -1,7 +1,11 @@
1
  import re
2
  import requests
 
3
 
4
  def clean_text_for_speech(text):
 
 
 
5
  # Replace URLs with readable text
6
  text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
7
  ' link ', text)
 
1
  import re
2
  import requests
3
+ from .text_processor import process_for_podcast
4
 
5
  def clean_text_for_speech(text):
6
+ # First apply podcast-specific processing
7
+ text = process_for_podcast(text)
8
+
9
  # Replace URLs with readable text
10
  text = re.sub(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
11
  ' link ', text)