Spaces:

devwildlifeai
/

wildlife_watcher_annotation_app_deva

Sleeping

wildlife_watcher_annotation_app_deva / app.py

Deva

handling weird exif element

72431ed 2 months ago

9.08 kB

	"""
	TOC:
	0) IMPORTS
	1) METADATA
	2) UPLOAD
	3) ANNOTATIONS
	-1) MAIN
	"""

	# gradio run.py --demo-name=my_demo

	##################################################
	# 0) IMPORTS
	##################################################

	# baselayer
	import os
	from io import BytesIO
	import argparse

	# web
	import gradio as gr

	# image processing
	from tkinter import Tk, filedialog
	from pathlib import Path
	from PIL import Image, ExifTags
	from PIL.ExifTags import TAGS

	# data science
	import numpy as np
	import pandas as pd

	# export
	import csv


	# from transformers import AutoImageProcessor, AutoModelForImageClassification
	# import torch
	# Load model
	# processor = AutoImageProcessor.from_pretrained("victor/animals-classifier")
	# model = AutoModelForImageClassification.from_pretrained("victor/animals-classifier")
	# model.eval()

	##################################################
	# 1) METADATA
	##################################################


	# this one works with PIL but we don't get all the metadata
	def decode_utf16_little_endian(binary_data):
	try:
	# Decode the binary data as UTF-16 Little Endian
	# print(f"Test:{binary_data.decode('utf-16-le')}")
	# print(f"Type:{type(binary_data)}")
	decoded_text = binary_data.decode("utf-16-le").rstrip("\x00")
	except Exception as e:
	decoded_text = "Encoded"
	return decoded_text


	'''
	def get_exif(list_file_paths):
	metadata_all_file = {}
	df = pd.DataFrame()
	for file_path in list_file_paths:
	metadata = {}
	metadata["name"] = file_path.split("/")[-1]
	print(file_path)
	try:
	image = Image.open(file_path)
	exifdata = image._getexif()
	if exifdata is not None:
	print(len(exifdata.items()))
	for tagid, value in exifdata.items():
	# print(tagid, value)
	# print(f"Value:{value}")
	tagname = str(TAGS.get(tagid, tagid))
	# value = exifdata.get(tagid)
	# Handle binary data
	if isinstance(value, bytes):
	# print(f"Value bytes {value}")
	# print(f"Value bytes {type(value)}")
	# print(f"Value str {decode_utf16_little_endian(value)}")
	value = decode_utf16_little_endian(value)
	print(tagname)
	print(type(tagname))
	print(value)
	if type(tagname) is not str:
	print(">>>>>>>>>>>> here " + type(tagname))
	try:
	metadata[str(tagname)] = value
	except:
	try:
	metadata[repr(tagname)] = value
	except:
	pass
	else:
	metadata[tagname] = value
	"""
	for key in metadata.keys():
	if type(key) is not str:
	try:
	metadata[str(key)] = metadata[key]
	except:
	try:
	metadata[repr(key)] = metadata[key]
	except:
	pass
	del metadata[key]
	"""
	# print(f"\t{metadata}")
	print(metadata)
	print(pd.DataFrame([metadata]))
	df = pd.concat([df, pd.DataFrame([metadata])], ignore_index=True)
	# new_row = {"name": file_path, **metadata}
	# df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
	# metadata_all_file[file_path] = metadata
	else:
	return "No EXIF metadata found."
	except Exception as e:
	return f"Error : {e}"
	print(pd.concat([df, pd.DataFrame([metadata])], ignore_index=True))
	print(f"FINAL DF \n \n \n {df}")
	return df
	'''
	import pandas as pd
	from PIL import Image
	from PIL.ExifTags import TAGS


	def decode_utf16_little_endian(value):
	try:
	return value.decode("utf-16le").strip()
	except:
	return value # Fallback to the original value if decoding fails


	def extract_particular_value_from_exif_file(metadata, tagname, value):
	pass


	def get_exif(list_file_paths):
	df = pd.DataFrame()

	for file_path in list_file_paths:
	metadata = {"name": file_path.split("/")[-1]}
	print(file_path)

	try:
	image = Image.open(file_path)
	exifdata = image._getexif()

	if exifdata is not None:
	for tagid, value in exifdata.items():
	tagname = TAGS.get(tagid, str(tagid)) # Ensure tagname is a string
	print(type(tagname))
	if isinstance(value, bytes):
	value = decode_utf16_little_endian(value)
	if isinstance(value, dict):
	# for subkey, subvalue in value.items():
	# metadata[f"{tagname}_{subkey}"] = subvalue
	# else:
	# metadata[tagname] = value
	value = str(value)
	print(value)
	print(type(value))
	metadata[tagname] = value # All keys are now strings
	print(metadata)
	if all(isinstance(k, str) for k in metadata.keys()):
	df = pd.concat([df, pd.DataFrame([metadata])], ignore_index=True)
	else:
	print("Skipping metadata with non-string keys.")
	else:
	print(f"No EXIF metadata found for {file_path}")

	except Exception as e:
	print(f"Error processing {file_path}: {e}")

	print(f"FINAL DF:\n{df}")
	return df


	##################################################
	# 2) UPLOAD
	##################################################


	def get_file_names(files_):
	"""
	Get a list of the name of files splitted to get only the proper name
	Input: Uploaded files
	Output: ['name of file 1', 'name of file 2']"""
	return [file.name for file in files_]


	##################################################
	# 3) ANNOTATIONS
	##################################################


	def get_annotation(files_):
	"""
	Get the label and accuracy from pretrained (or futur custom model)
	Input: Uploaded files
	Output: Df that contains: file_name \| label \| accuracy
	"""
	# df = pd.DataFrame(columns=["file_name", "label", "accuracy"])
	df_exif = get_exif(get_file_names(files_))
	return df_exif


	def update_dataframe(df):
	return df # Simply return the modified dataframe


	def df_to_csv(df_, encodings=None):
	"""
	Get the df and convert it as an gradio file output ready for download
	Input: DF created
	Output: gr.File()
	"""
	if encodings is None:
	encodings = ["utf-8", "utf-8-sig", "latin1", "iso-8859-1", "cp1252"]

	for encoding in encodings:
	try:
	df_.to_csv("output.csv", encoding=encoding, index=False)
	# print(f"File saved successfully with encoding: {encoding}")
	return gr.File(value="output.csv", visible=True)
	except Exception as e:
	print(f"Failed with encoding {encoding}: {e}")


	##################################################
	# -1) MAIN
	##################################################


	def process_files(files_):
	"""
	Main function
	- Get uploaded files
	- Get annotations # TODO
	- Get the corresponding df
	- Get the csv output
	"""
	df = get_annotation(files_)
	return df


	with gr.Blocks() as interface:
	gr.Markdown("# Wildlife.ai Annotation tools")
	# Upload data
	with gr.Row():
	upload_btn = gr.UploadButton(
	"Upload raw data",
	file_types=["image", "video"],
	file_count="multiple",
	)
	update_btn = gr.Button("Modify raw data")
	download_raw_btn = gr.Button("Generate raw data as csv")
	download_modified_btn = gr.Button("Generate new data as a csv")
	# Get results
	gr.Markdown("## Results")
	df = gr.DataFrame(interactive=False)
	download_raw_btn.click(
	fn=df_to_csv,
	inputs=[df],
	outputs=gr.File(visible=False),
	)
	gr.Markdown("## Modified results")
	df_modified = gr.DataFrame(interactive=True)
	download_modified_btn.click(
	fn=df_to_csv,
	inputs=[df_modified],
	outputs=gr.File(visible=False),
	show_progress=False,
	)
	# gr.Markdown("## Extract as CSV")
	# Buttons
	upload_btn.upload(fn=process_files, inputs=upload_btn, outputs=df)
	update_btn.click(fn=update_dataframe, inputs=df, outputs=df_modified)


	if __name__ == "__main__":
	interface.launch(debug=True)