import re

def clean_asterisks(text):
    """Aggressively remove all asterisk patterns."""
    # Remove any number of asterisks with content between them
    text = re.sub(r'\*+([^*]*)\*+', r'\1', text)
    # Remove any remaining single asterisks
    text = text.replace('*', '')
    # Remove multiple spaces that might result
    text = ' '.join(text.split())
    return text

def remove_dialog_formatting(text):
    """Remove common dialog markers and formatting."""
    # Remove speaker labels (e.g., "John:", "JOHN:", "[John]:")
    text = re.sub(r'^[A-Z0-9\[\]]+:', '', text, flags=re.MULTILINE)
    text = re.sub(r'^[A-Z][a-z]+:', '', text, flags=re.MULTILINE)
    
    # Remove parenthetical stage directions
    text = re.sub(r'\([^)]*\)', '', text)
    text = re.sub(r'\[[^\]]*\]', '', text)
    
    return text

def remove_breakthrough_formatting(text):
    """Remove any LLM formatting that made it through the prompts."""
    patterns = [
        (r'^.*?:\s*', ''),  # Remove any remaining speaker labels
        (r'\[.*?\]', ''),   # Remove any breakthrough brackets
        (r'\(.*?\)', ''),   # Remove any breakthrough parentheticals
        (r'"\w+:"', ''),    # Remove quoted speaker labels
        (r'<.*?>', ''),     # Remove any HTML-like tags
        (r'---.*?---', ''), # Remove any section separators
        (r'#\s*\w+', ''),   # Remove any hashtag sections
    ]
    
    for pattern, replacement in patterns:
        text = re.sub(pattern, replacement, text, flags=re.MULTILINE)
    return text

def convert_to_monologue(text):
    """Convert multi-party dialog into a flowing narrative."""
    # Replace dialog markers with transitional phrases
    transitions = [
        "Then", "After that", "Next", "Following that",
        "Subsequently", "Moving on", "Additionally"
    ]
    
    lines = text.split('\n')
    narrative = []
    current_transition = 0
    
    for line in lines:
        if line.strip():
            # Remove speaker labels if any
            cleaned_line = re.sub(r'^[A-Z0-9\[\]]+:\s*', '', line)
            cleaned_line = re.sub(r'^[A-Z][a-z]+:\s*', '', cleaned_line)
            
            # Add transition if it seems like a new thought
            if narrative and cleaned_line[0].isupper():
                narrative.append(f"{transitions[current_transition]}, {cleaned_line.lower()}")
                current_transition = (current_transition + 1) % len(transitions)
            else:
                narrative.append(cleaned_line)
    
    return ' '.join(narrative)

def clean_formatting(text):
    """Remove markdown and other formatting symbols."""
    # Apply asterisk cleaning first
    text = clean_asterisks(text)
    
    # Remove markdown formatting
    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)  # Bold
    text = re.sub(r'\*(.+?)\*', r'\1', text)      # Italic
    text = re.sub(r'\_(.+?)\_', r'\1', text)      # Underscore emphasis
    text = re.sub(r'\~\~(.+?)\~\~', r'\1', text)  # Strikethrough
    
    # Remove code blocks and inline code
    text = re.sub(r'```[\s\S]*?```', '', text)
    text = re.sub(r'`[^`]*`', '', text)
    
    return text

def process_for_podcast(text):
    """Main function to process text for podcast narration."""
    # Apply asterisk cleaning as first step
    text = clean_asterisks(text)
    text = remove_dialog_formatting(text)
    text = clean_formatting(text)
    text = remove_breakthrough_formatting(text)
    text = convert_to_monologue(text)
    
    # Additional cleanups
    text = re.sub(r'\s+', ' ', text)  # Remove multiple spaces
    text = re.sub(r'\n+', ' ', text)  # Remove newlines
    text = text.strip()
    
    # Final asterisk check before returning
    text = clean_asterisks(text)
    return text