|
import os |
|
import tempfile |
|
import uuid |
|
import zipfile |
|
import shutil |
|
from lxml import etree |
|
import gradio as gr |
|
from concurrent.futures import ThreadPoolExecutor |
|
from functools import lru_cache |
|
|
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
|
|
|
|
|
|
hf_token = os.getenv("HF_TOKEN") |
|
if not hf_token: |
|
raise ValueError("Hugging Face token not found. Please set it in the environment.") |
|
client = InferenceClient(token=hf_token) |
|
llama_model_id = "google/gemma-2-27b-it" |
|
client.model = llama_model_id |
|
|
|
|
|
|
|
|
|
|
|
languages = { |
|
"Arabic": "ar", |
|
"Bulgarian": "bg", |
|
"Catalan": "ca", |
|
"Czech": "cs", |
|
"German": "de", |
|
"English": "en", |
|
"Spanish": "es", |
|
"Estonian": "et", |
|
"Finnish": "fi", |
|
"French": "fr", |
|
"Hungarian": "hu", |
|
"Italian": "it", |
|
"Lithuanian": "lt", |
|
"Latvian": "lv", |
|
"Dutch": "nl", |
|
"Polish": "pl", |
|
"Portuguese": "pt", |
|
"Romanian": "ro", |
|
"Russian": "ru", |
|
"Slovak": "sk", |
|
"Slovenian": "sl", |
|
"Swedish": "sv", |
|
"Turkish": "tr", |
|
"Ukrainian": "uk", |
|
"Vietnamese": "vi", |
|
|
|
} |
|
|
|
sorted_languages = sorted(languages.items()) |
|
|
|
|
|
|
|
|
|
|
|
def single_translate(target_lang, text): |
|
|
|
language_names = {v: k for k, v in languages.items()} |
|
language = language_names.get(target_lang, "Arabic") |
|
|
|
|
|
prompt = f"You are a professional Translator: don't say a greeting or anything just jump into the translation: Make sure you don't output anything else other than the {language} translation requested for the upcoming text:{text} also dont use curly brackets or such just the translation." |
|
try: |
|
output = client.text_generation(prompt) |
|
|
|
if isinstance(output, str): |
|
return output.strip() |
|
elif isinstance(output, list) and len(output) > 0: |
|
first_item = output[0] |
|
if isinstance(first_item, dict) and "generated_text" in first_item: |
|
return first_item["generated_text"].strip() |
|
else: |
|
return str(output).strip() |
|
else: |
|
return str(output).strip() |
|
except Exception as e: |
|
return f"Translation error: {e}" |
|
|
|
|
|
|
|
|
|
|
|
@lru_cache(maxsize=10000) |
|
def translate_texts(target_lang, texts): |
|
translations = [] |
|
max_workers = 8 |
|
with ThreadPoolExecutor(max_workers=max_workers) as executor: |
|
futures = [executor.submit(single_translate, target_lang, text) for text in texts] |
|
for future in futures: |
|
translations.append(future.result()) |
|
return translations |
|
|
|
|
|
|
|
|
|
|
|
def get_slide_size_from_presentation(xml_root): |
|
fallback_width = 914400 * 10 |
|
fallback_height = 914400 * 7 |
|
nsmap = {"p": "http://schemas.openxmlformats.org/presentationml/2006/main"} |
|
sldSz_elems = xml_root.xpath("//p:presentation/p:sldSz", namespaces=nsmap) |
|
if sldSz_elems: |
|
sldSz = sldSz_elems[0] |
|
cx_attr = sldSz.get("cx") |
|
cy_attr = sldSz.get("cy") |
|
if cx_attr and cy_attr: |
|
try: |
|
slide_width_emu = int(cx_attr) |
|
slide_height_emu = int(cy_attr) |
|
return (slide_width_emu, slide_height_emu) |
|
except: |
|
pass |
|
return (fallback_width, fallback_height) |
|
|
|
def mirror_x_around_center(off_x, ext_cx, slide_width): |
|
shape_center = off_x + (ext_cx / 2.0) |
|
slide_center = slide_width / 2.0 |
|
displacement = shape_center - slide_center |
|
mirrored_center = slide_center - displacement |
|
new_off_x = mirrored_center - (ext_cx / 2.0) |
|
return int(new_off_x) |
|
|
|
def reflect_translate_align_xml(xml_path, slide_width_emu, target_lang, do_mirroring=True, do_text_translation=True, do_right_align=True): |
|
nsmap = { |
|
"p": "http://schemas.openxmlformats.org/presentationml/2006/main", |
|
"a": "http://schemas.openxmlformats.org/drawingml/2006/main" |
|
} |
|
tree = etree.parse(xml_path) |
|
root_elem = tree.getroot() |
|
changed = False |
|
|
|
if do_mirroring: |
|
xfrm_elems = root_elem.xpath( |
|
".//p:spPr/a:xfrm | .//p:pic/p:spPr/a:xfrm | .//p:cxnSp/p:spPr/a:xfrm", |
|
namespaces=nsmap |
|
) |
|
for xfrm in xfrm_elems: |
|
off_elem = xfrm.find("a:off", nsmap) |
|
ext_elem = xfrm.find("a:ext", nsmap) |
|
if off_elem is not None and ext_elem is not None: |
|
try: |
|
off_x = int(off_elem.get("x", "0")) |
|
ext_cx = int(ext_elem.get("cx", "0")) |
|
new_off_x = mirror_x_around_center(off_x, ext_cx, slide_width_emu) |
|
off_elem.set("x", str(new_off_x)) |
|
changed = True |
|
except: |
|
pass |
|
|
|
if do_text_translation: |
|
t_elems = root_elem.xpath(".//a:t", namespaces=nsmap) |
|
texts = [a_t.text.strip() for a_t in t_elems if a_t.text and a_t.text.strip()] |
|
unique_texts = list(set(texts)) |
|
translations = translate_texts(target_lang, tuple(unique_texts)) if texts else [] |
|
|
|
translation_map = dict(zip(unique_texts, translations)) |
|
for a_t in t_elems: |
|
original_text = a_t.text.strip() if a_t.text else "" |
|
if original_text in translation_map: |
|
a_t.text = translation_map.get(original_text, original_text) |
|
changed = True |
|
|
|
if do_right_align: |
|
pPr_elems = root_elem.xpath(".//a:p/a:pPr", namespaces=nsmap) |
|
for pPr in pPr_elems: |
|
pPr.set("algn", "r") |
|
changed = True |
|
|
|
if changed: |
|
tree.write(xml_path, xml_declaration=True, encoding="UTF-8", standalone="yes") |
|
return changed |
|
|
|
def direct_raw_translate_reflect_pptx_with_progress(pptx_path, target_lang, do_mirroring): |
|
try: |
|
temp_dir = os.path.join(tempfile.gettempdir(), f"raw_{uuid.uuid4().hex}") |
|
os.makedirs(temp_dir, exist_ok=True) |
|
yield (None, "10% - Extracting PPTX contents...") |
|
|
|
with zipfile.ZipFile(pptx_path, "r") as z_in: |
|
z_in.extractall(temp_dir) |
|
|
|
presentation_xml = os.path.join(temp_dir, "ppt", "presentation.xml") |
|
slide_width_emu, slide_height_emu = (9144000, 6858000) |
|
if os.path.exists(presentation_xml): |
|
pres_tree = etree.parse(presentation_xml) |
|
pres_root = pres_tree.getroot() |
|
slide_width_emu, slide_height_emu = get_slide_size_from_presentation(pres_root) |
|
yield (None, "20% - Retrieved slide size.") |
|
|
|
xml_files = [] |
|
for sub_dir in ["slideMasters", "slideLayouts", "slides"]: |
|
dir_path = os.path.join(temp_dir, "ppt", sub_dir) |
|
if os.path.isdir(dir_path): |
|
for fname in os.listdir(dir_path): |
|
if fname.endswith(".xml"): |
|
xml_files.append(os.path.join(dir_path, fname)) |
|
total_files = len(xml_files) |
|
yield (None, f"30% - Found {total_files} XML files to process.") |
|
|
|
with ThreadPoolExecutor() as executor: |
|
for idx, _ in enumerate(executor.map(lambda path: reflect_translate_align_xml( |
|
path, slide_width_emu, target_lang, do_mirroring, do_right_align=do_mirroring), xml_files), start=1): |
|
progress_percent = 30 + (50 * idx / total_files) |
|
yield (None, f"{int(progress_percent)}% - Processed {idx}/{total_files} XML files.") |
|
|
|
yield (None, "80% - Fixing color schemes...") |
|
|
|
ppt_dir = os.path.join(temp_dir, "ppt") |
|
for root_dir, dirs, files in os.walk(ppt_dir): |
|
for file in files: |
|
if file.endswith(".xml"): |
|
xml_path = os.path.join(root_dir, file) |
|
tree = etree.parse(xml_path) |
|
root_elem = tree.getroot() |
|
nsmap = {"a": "http://schemas.openxmlformats.org/drawingml/2006/main"} |
|
scheme_clr_elems = root_elem.xpath(".//a:schemeClr", namespaces=nsmap) |
|
changed2 = False |
|
for sc in scheme_clr_elems: |
|
val_attr = sc.get("val") |
|
if not val_attr or val_attr.lower() == "none": |
|
srgb_elem = etree.Element("{http://schemas.openxmlformats.org/drawingml/2006/main}srgbClr") |
|
srgb_elem.set("val", "000000") |
|
parent = sc.getparent() |
|
parent.replace(sc, srgb_elem) |
|
changed2 = True |
|
if changed2: |
|
tree.write(xml_path, xml_declaration=True, encoding="UTF-8", standalone="yes") |
|
|
|
yield (None, "90% - Re-zipping the PPTX file...") |
|
|
|
translated_path = os.path.join(tempfile.gettempdir(), f"translated_{uuid.uuid4().hex}.pptx") |
|
with zipfile.ZipFile(translated_path, "w", zipfile.ZIP_DEFLATED) as z_out: |
|
for root_dir, dirs, files in os.walk(temp_dir): |
|
for file in files: |
|
full_path = os.path.join(root_dir, file) |
|
rel_path = os.path.relpath(full_path, temp_dir) |
|
z_out.write(full_path, arcname=rel_path) |
|
|
|
shutil.rmtree(temp_dir, ignore_errors=True) |
|
yield (translated_path, "β
Processing complete.") |
|
except Exception as e: |
|
yield (None, f"β Error: {str(e)}") |
|
|
|
def raw_xml_reflect_and_translate(file, target_lang, do_mirroring): |
|
if not file: |
|
yield (None, "β No file uploaded.") |
|
return |
|
try: |
|
yield from direct_raw_translate_reflect_pptx_with_progress(file, target_lang, do_mirroring) |
|
except Exception as e: |
|
yield (None, f"β Error: {str(e)}") |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("<h1>tarGEM π</h1>") |
|
gr.Markdown(""" |
|
<h3>PowerPoint Reflection & Translation Tool</h3> |
|
<p>Enhance your presentations with mirrored layouts, translated text, and aligned bullet points.</p> |
|
<ul> |
|
<li>π <strong>Mirror</strong> shapes for balanced designs</li> |
|
<li>π <strong>Translate</strong> text seamlessly</li> |
|
<li>π <strong>Align</strong> to the right using RTL checkbox</li> |
|
<li>π¨ <strong>Keep your original</strong> font style, slide background and color schemes</li> |
|
</ul> |
|
""") |
|
|
|
ppt_input = gr.File(label="π Upload PPTX File", file_types=[".pptx"], type="filepath") |
|
|
|
language_dropdown = gr.Dropdown( |
|
label="π Select Target Language", |
|
choices=[(name, code) for name, code in sorted_languages], |
|
value="ar", |
|
type="value", |
|
interactive=True |
|
) |
|
|
|
mirror_checkbox = gr.Checkbox( |
|
label="π Mirror Shapes for RTL", |
|
value=True, |
|
interactive=True |
|
) |
|
|
|
reflect_button = gr.Button("Process PPTX") |
|
|
|
output_file = gr.File(label="π Download Processed PPTX", interactive=False) |
|
|
|
status_box = gr.Textbox(label="Status", interactive=False) |
|
|
|
reflect_button.click( |
|
fn=raw_xml_reflect_and_translate, |
|
inputs=[ppt_input, language_dropdown, mirror_checkbox], |
|
outputs=[output_file, status_box], |
|
queue=True |
|
) |
|
|
|
gr.Markdown(""" |
|
<p style="text-align: center; margin-top: 20px;"> |
|
π Thank you for using tarGEM! |
|
</p> |
|
""") |
|
|
|
demo.launch(share=True) |
|
|