datagouv-french-data-analyst / tools /libreoffice_tools.py
axel-darmouni's picture
all modifs
f584ef2
import os
import subprocess
import platform
from smolagents import tool
def get_libreoffice_path():
"""
Get the correct LibreOffice path based on the operating system.
Returns:
str: Path to LibreOffice executable or None if not found
"""
system = platform.system()
if system == "Darwin": # macOS
# Common LibreOffice installation paths on macOS
possible_paths = [
"/Applications/LibreOffice.app/Contents/MacOS/soffice",
"/Applications/LibreOffice Developer Edition.app/Contents/MacOS/soffice",
"/opt/homebrew/bin/soffice", # Homebrew installation
"/usr/local/bin/soffice"
]
for path in possible_paths:
if os.path.exists(path):
return path
elif system == "Linux":
# Common LibreOffice paths on Linux
possible_paths = [
"/usr/bin/libreoffice",
"/usr/bin/soffice",
"/snap/bin/libreoffice",
"/usr/local/bin/libreoffice"
]
for path in possible_paths:
if os.path.exists(path):
return path
elif system == "Windows":
# Common LibreOffice paths on Windows
possible_paths = [
r"C:\Program Files\LibreOffice\program\soffice.exe",
r"C:\Program Files (x86)\LibreOffice\program\soffice.exe"
]
for path in possible_paths:
if os.path.exists(path):
return path
# Try to find it in PATH as fallback
try:
result = subprocess.run(['which', 'soffice'], capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.strip()
except:
pass
try:
result = subprocess.run(['which', 'libreoffice'], capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.strip()
except:
pass
return None
@tool
def convert_to_pdf_with_libreoffice(input_file: str, output_dir: str = None) -> str:
"""
Convert a document to PDF using LibreOffice.
Args:
input_file: Path to the input document
output_dir: Directory to save the PDF (optional, defaults to same directory as input)
Returns:
str: Path to the generated PDF file or error message
"""
libreoffice_path = get_libreoffice_path()
if not libreoffice_path:
return "LibreOffice not found. Please install LibreOffice from https://www.libreoffice.org/"
if not os.path.exists(input_file):
return f"Input file not found: {input_file}"
if output_dir is None:
output_dir = os.path.dirname(input_file)
if not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
try:
# Use LibreOffice headless mode to convert to PDF
cmd = [
libreoffice_path,
'--headless',
'--convert-to', 'pdf',
'--outdir', output_dir,
input_file
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if result.returncode == 0:
# Generate expected output filename
base_name = os.path.splitext(os.path.basename(input_file))[0]
pdf_path = os.path.join(output_dir, f"{base_name}.pdf")
if os.path.exists(pdf_path):
return pdf_path
else:
return f"PDF conversion completed but file not found at expected location: {pdf_path}"
else:
return f"LibreOffice conversion failed: {result.stderr}"
except subprocess.TimeoutExpired:
return "LibreOffice conversion timed out after 60 seconds"
except Exception as e:
return f"Error during LibreOffice conversion: {str(e)}"
@tool
def check_libreoffice_availability() -> bool:
"""
Check if LibreOffice is available on the system.
Returns:
bool: True if LibreOffice is available, False otherwise
"""
libreoffice_path = get_libreoffice_path()
return libreoffice_path is not None
@tool
def get_libreoffice_info() -> str:
"""
Get detailed information about LibreOffice installation for troubleshooting.
Returns:
str: Detailed information about LibreOffice availability and installation
"""
libreoffice_path = get_libreoffice_path()
if not libreoffice_path:
system = platform.system()
install_info = {
"Darwin": "Install with: brew install libreoffice OR download from https://www.libreoffice.org/",
"Linux": "Install with: sudo apt install libreoffice OR sudo yum install libreoffice",
"Windows": "Download from https://www.libreoffice.org/"
}
return f"LibreOffice not found on {system}. {install_info.get(system, 'Install from https://www.libreoffice.org/')}"
try:
# Get version info
result = subprocess.run([libreoffice_path, '--version'], capture_output=True, text=True, timeout=10)
version_info = result.stdout.strip() if result.returncode == 0 else "Version unknown"
return f"LibreOffice found at: {libreoffice_path}\nVersion: {version_info}"
except:
return f"LibreOffice found at: {libreoffice_path}\nVersion: Unable to determine"
if __name__ == "__main__":
# Test the LibreOffice detection
print(check_libreoffice_availability())