LATEX_RETR / app.py
AkshitShubham's picture
Update app.py
9c0fd54 verified
raw
history blame
11.7 kB
import gradio as gr
import requests
import base64
import json
from PIL import Image
import io
import os
import re
from datetime import datetime
# Global counter for serial numbering
equation_counter = 1
def clean_latex_for_cli(latex_text):
"""Clean and format LaTeX for CLI usage"""
# Remove markdown code blocks if present
latex_text = re.sub(r'```latex\n?', '', latex_text)
latex_text = re.sub(r'```\n?', '', latex_text)
# Escape special characters for CLI
latex_text = latex_text.replace('"', '\\"')
latex_text = latex_text.replace('\\', '\\\\')
return latex_text
def extract_equations_and_generate_commands(api_response):
"""Extract individual equations and generate maths2svg commands"""
global equation_counter
commands = []
# Split the response into lines and process each potential equation
lines = api_response.split('\n')
current_equation = ""
for line in lines:
line = line.strip()
if not line:
continue
# Check if line contains LaTeX math
if '\\' in line and not line.startswith('#'):
# Extract LaTeX content
latex_content = line
# Clean up common markdown artifacts
latex_content = re.sub(r'^\*\*.*?\*\*:?\s*', '', latex_content)
latex_content = re.sub(r'^#+\s*', '', latex_content)
if latex_content.strip():
# Generate filename suggestion
filename_prompt = f"equation_{equation_counter:03d}"
# Clean LaTeX for CLI
clean_latex = clean_latex_for_cli(latex_content)
# Generate the maths2svg command
command = f'maths2svg --latex "{clean_latex}" --output {filename_prompt}.svg'
commands.append(command)
equation_counter += 1
return commands
def get_filename_suggestions(latex_equations):
"""Ask Gemini to suggest meaningful filenames for equations"""
try:
api_key = os.getenv('GEMINI_API_KEY')
if not api_key:
return []
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}"
equations_text = "\n".join(latex_equations)
payload = {
"contents": [
{
"parts": [
{
"text": f"""Given these LaTeX equations:
{equations_text}
Please suggest short, descriptive filenames (without extension) for each equation. The filenames should:
- Be descriptive of what the equation represents
- Use underscores instead of spaces
- Be concise (max 20 characters)
- Use mathematical terminology when appropriate
Format your response as a simple list, one filename per line, in the same order as the equations.
If you can't determine a meaningful name, suggest a generic mathematical term.
Example format:
quadratic_formula
pythagorean_theorem
area_circle"""
}
]
}
]
}
response = requests.post(url, headers={'Content-Type': 'application/json'}, json=payload)
if response.status_code == 200:
result = response.json()
if 'candidates' in result and len(result['candidates']) > 0:
suggestions = result['candidates'][0]['content']['parts'][0]['text'].strip().split('\n')
return [s.strip() for s in suggestions if s.strip()]
return []
except:
return []
def image_to_latex(image):
"""
Convert an image containing mathematical equations to LaTeX format with maths2svg commands
"""
if image is None:
return "Please upload an image first."
try:
# Convert PIL Image to base64
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_base64 = base64.b64encode(buffered.getvalue()).decode()
# Get API key from environment variable
api_key = os.getenv('GEMINI_API_KEY')
if not api_key:
return "Error: GEMINI_API_KEY environment variable not set. Please set your Google AI API key."
# Prepare the request to Gemini API
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={api_key}"
headers = {
'Content-Type': 'application/json'
}
# Construct the payload with image and text prompt
payload = {
"contents": [
{
"parts": [
{
"text": """Please analyze this image and convert any mathematical equations, formulas, or mathematical text you find into proper LaTeX format.
Instructions:
- Extract each mathematical equation or formula separately
- Use proper LaTeX syntax for mathematical expressions
- Output raw LaTeX without any $ delimiters
- Put each equation on a separate line
- Do not include any markdown formatting or explanatory text
- Only output the raw LaTeX code for each equation
- If there are multiple equations, list them one per line
Example output format:
\\frac{a}{b} = c
x^2 + y^2 = z^2
\\int_0^1 x dx = \\frac{1}{2}"""
},
{
"inline_data": {
"mime_type": "image/png",
"data": img_base64
}
}
]
}
]
}
# Make the API request
response = requests.post(url, headers=headers, json=payload)
if response.status_code == 200:
result = response.json()
# Extract the generated text
if 'candidates' in result and len(result['candidates']) > 0:
latex_output = result['candidates'][0]['content']['parts'][0]['text'].strip()
# Extract equations and generate commands
equations = []
for line in latex_output.split('\n'):
line = line.strip()
if line and '\\' in line:
equations.append(line)
if not equations:
return "No mathematical equations found in the image."
# Get filename suggestions from Gemini
filename_suggestions = get_filename_suggestions(equations)
# Generate maths2svg commands
commands = []
global equation_counter
for i, equation in enumerate(equations):
# Clean LaTeX for CLI
clean_latex = clean_latex_for_cli(equation)
# Use suggested filename or fall back to serial numbering
if i < len(filename_suggestions) and filename_suggestions[i]:
filename = filename_suggestions[i]
else:
filename = f"equation_{equation_counter:03d}"
equation_counter += 1
# Generate the command
command = f'maths2svg --latex "{clean_latex}" --output {filename}.svg'
commands.append(command)
# Format the final output - just the commands
output = "\n".join(commands)
return output
else:
return "No response generated from the API."
else:
return f"API Error: {response.status_code} - {response.text}"
except Exception as e:
return f"Error processing image: {str(e)}"
def create_app():
"""Create and configure the Gradio interface"""
# Custom CSS for better styling
css = """
.gradio-container {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
.output-latex {
font-family: 'Courier New', monospace;
background-color: #f8f9fa;
border: 1px solid #dee2e6;
border-radius: 4px;
padding: 10px;
}
"""
with gr.Blocks(css=css, title="Image to maths2svg Converter") as app:
gr.Markdown(
"""
# πŸ“Έ Image to maths2svg Command Generator
Upload an image containing mathematical equations and get ready-to-use `maths2svg` commands
with intelligent filename suggestions.
**Features:**
- Extracts multiple equations from images
- Generates proper LaTeX syntax
- Creates maths2svg CLI commands
- Suggests meaningful filenames using AI
- Falls back to serial numbering if needed
**Supported content:**
- Mathematical equations and formulas
- Mathematical symbols and expressions
- Multiple equations in one image
**Note:** Make sure to set your `GEMINI_API_KEY` environment variable before running this app.
"""
)
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(
label="Upload Image with Math Equations",
type="pil",
height=400
)
convert_btn = gr.Button(
"Generate maths2svg Commands",
variant="primary",
size="lg"
)
with gr.Column(scale=1):
latex_output = gr.Textbox(
label="maths2svg Commands",
lines=20,
max_lines=30,
placeholder="Generated maths2svg commands will appear here...",
elem_classes=["output-latex"]
)
# Tips section
gr.Markdown("### πŸ“ Tips for best results:")
gr.Markdown(
"""
- Use clear, high-contrast images
- Ensure mathematical expressions are clearly visible
- Multiple equations in one image are supported
- Handwritten or printed equations both work
- The app will generate meaningful filenames automatically
"""
)
# Example section
gr.Markdown("### 🎯 Example Output Format:")
gr.Code(
"""maths2svg --latex "h = h_0 + \\frac{2\\sigma \\cos\\theta}{\\rho g r}" --output capillary_rise.svg"""
)
# Set up the conversion action
convert_btn.click(
fn=image_to_latex,
inputs=[image_input],
outputs=[latex_output]
)
# Also allow conversion when image is uploaded
image_input.change(
fn=image_to_latex,
inputs=[image_input],
outputs=[latex_output]
)
return app
if __name__ == "__main__":
# Create and launch the app
app = create_app()
# Launch with custom settings
app.launch(
share=False, # Set to True if you want to create a public link
server_name="0.0.0.0",
server_port=7860,
show_error=True
)