Spaces:

devwildlifeai
/

wildlife_watcher_annotation_app_deva

Sleeping

File size: 9,077 Bytes

"""
TOC:
0) IMPORTS 
1) METADATA
2) UPLOAD
3) ANNOTATIONS
-1) MAIN
"""

# gradio run.py --demo-name=my_demo

##################################################
# 0) IMPORTS
##################################################

# baselayer
import os
from io import BytesIO
import argparse

# web
import gradio as gr

# image processing
from tkinter import Tk, filedialog
from pathlib import Path
from PIL import Image, ExifTags
from PIL.ExifTags import TAGS

# data science
import numpy as np
import pandas as pd

# export
import csv


# from transformers import AutoImageProcessor, AutoModelForImageClassification
# import torch
# Load model
# processor = AutoImageProcessor.from_pretrained("victor/animals-classifier")
# model = AutoModelForImageClassification.from_pretrained("victor/animals-classifier")
# model.eval()

##################################################
# 1) METADATA
##################################################


# this one works with PIL but we don't get all the metadata
def decode_utf16_little_endian(binary_data):
    try:
        # Decode the binary data as UTF-16 Little Endian
        # print(f"Test:{binary_data.decode('utf-16-le')}")
        # print(f"Type:{type(binary_data)}")
        decoded_text = binary_data.decode("utf-16-le").rstrip("\x00")
    except Exception as e:
        decoded_text = "Encoded"
    return decoded_text


'''
def get_exif(list_file_paths):
    metadata_all_file = {}
    df = pd.DataFrame()
    for file_path in list_file_paths:
        metadata = {}
        metadata["name"] = file_path.split("/")[-1]
        print(file_path)
        try:
            image = Image.open(file_path)
            exifdata = image._getexif()
            if exifdata is not None:
                print(len(exifdata.items()))
                for tagid, value in exifdata.items():
                    # print(tagid, value)
                    # print(f"Value:{value}")
                    tagname = str(TAGS.get(tagid, tagid))
                    # value = exifdata.get(tagid)
                    # Handle binary data
                    if isinstance(value, bytes):
                        # print(f"Value bytes {value}")
                        # print(f"Value bytes {type(value)}")
                        # print(f"Value str {decode_utf16_little_endian(value)}")
                        value = decode_utf16_little_endian(value)
                    print(tagname)
                    print(type(tagname))
                    print(value)
                    if type(tagname) is not str:
                        print(">>>>>>>>>>>> here " + type(tagname))
                        try:
                            metadata[str(tagname)] = value
                        except:
                            try:
                                metadata[repr(tagname)] = value
                            except:
                                pass
                    else:
                        metadata[tagname] = value
                    """
                    for key in metadata.keys():
                        if type(key) is not str:
                            try:
                                metadata[str(key)] = metadata[key]
                            except:
                                try:
                                    metadata[repr(key)] = metadata[key]
                                except:
                                    pass
                            del metadata[key]
                    """
                    # print(f"\t{metadata}")
                print(metadata)
                print(pd.DataFrame([metadata]))
                df = pd.concat([df, pd.DataFrame([metadata])], ignore_index=True)
                # new_row = {"name": file_path, **metadata}
                # df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
                # metadata_all_file[file_path] = metadata
            else:
                return "No EXIF metadata found."
        except Exception as e:
            return f"Error : {e}"
        print(pd.concat([df, pd.DataFrame([metadata])], ignore_index=True))
    print(f"FINAL DF \n \n \n {df}")
    return df
'''
import pandas as pd
from PIL import Image
from PIL.ExifTags import TAGS


def decode_utf16_little_endian(value):
    try:
        return value.decode("utf-16le").strip()
    except:
        return value  # Fallback to the original value if decoding fails


def extract_particular_value_from_exif_file(metadata, tagname, value):
    pass


def get_exif(list_file_paths):
    df = pd.DataFrame()

    for file_path in list_file_paths:
        metadata = {"name": file_path.split("/")[-1]}
        print(file_path)

        try:
            image = Image.open(file_path)
            exifdata = image._getexif()

            if exifdata is not None:
                for tagid, value in exifdata.items():
                    tagname = TAGS.get(tagid, str(tagid))  # Ensure tagname is a string
                    print(type(tagname))
                    if isinstance(value, bytes):
                        value = decode_utf16_little_endian(value)
                    if isinstance(value, dict):
                        # for subkey, subvalue in value.items():
                        #     metadata[f"{tagname}_{subkey}"] = subvalue
                        # else:
                        #     metadata[tagname] = value
                        value = str(value)
                    print(value)
                    print(type(value))
                    metadata[tagname] = value  # All keys are now strings
                    print(metadata)
                if all(isinstance(k, str) for k in metadata.keys()):
                    df = pd.concat([df, pd.DataFrame([metadata])], ignore_index=True)
                else:
                    print("Skipping metadata with non-string keys.")
            else:
                print(f"No EXIF metadata found for {file_path}")

        except Exception as e:
            print(f"Error processing {file_path}: {e}")

    print(f"FINAL DF:\n{df}")
    return df


##################################################
# 2) UPLOAD
##################################################


def get_file_names(files_):
    """
    Get a list of the name of files splitted to get only the proper name
    Input: Uploaded files
    Output: ['name of file 1', 'name of file 2']"""
    return [file.name for file in files_]


##################################################
# 3) ANNOTATIONS
##################################################


def get_annotation(files_):
    """
    Get the label and accuracy from pretrained (or futur custom model)
    Input: Uploaded files
    Output: Df that contains: file_name | label | accuracy
    """
    # df = pd.DataFrame(columns=["file_name", "label", "accuracy"])
    df_exif = get_exif(get_file_names(files_))
    return df_exif


def update_dataframe(df):
    return df  # Simply return the modified dataframe


def df_to_csv(df_, encodings=None):
    """
    Get the df and convert it as an gradio file output ready for download
    Input: DF created
    Output: gr.File()
    """
    if encodings is None:
        encodings = ["utf-8", "utf-8-sig", "latin1", "iso-8859-1", "cp1252"]

    for encoding in encodings:
        try:
            df_.to_csv("output.csv", encoding=encoding, index=False)
            # print(f"File saved successfully with encoding: {encoding}")
            return gr.File(value="output.csv", visible=True)
        except Exception as e:
            print(f"Failed with encoding {encoding}: {e}")


##################################################
# -1) MAIN
##################################################


def process_files(files_):
    """
    Main function
    - Get uploaded files
    - Get annotations # TODO
    - Get the corresponding df
    - Get the csv output
    """
    df = get_annotation(files_)
    return df


with gr.Blocks() as interface:
    gr.Markdown("# Wildlife.ai Annotation tools")
    # Upload data
    with gr.Row():
        upload_btn = gr.UploadButton(
            "Upload raw data",
            file_types=["image", "video"],
            file_count="multiple",
        )
        update_btn = gr.Button("Modify raw data")
        download_raw_btn = gr.Button("Generate raw data as csv")
        download_modified_btn = gr.Button("Generate new data as a csv")
        # Get results
    gr.Markdown("## Results")
    df = gr.DataFrame(interactive=False)
    download_raw_btn.click(
        fn=df_to_csv,
        inputs=[df],
        outputs=gr.File(visible=False),
    )
    gr.Markdown("## Modified results")
    df_modified = gr.DataFrame(interactive=True)
    download_modified_btn.click(
        fn=df_to_csv,
        inputs=[df_modified],
        outputs=gr.File(visible=False),
        show_progress=False,
    )
    # gr.Markdown("## Extract as CSV")
    # Buttons
    upload_btn.upload(fn=process_files, inputs=upload_btn, outputs=df)
    update_btn.click(fn=update_dataframe, inputs=df, outputs=df_modified)


if __name__ == "__main__":
    interface.launch(debug=True)