Spaces:

Samarth991
/

CV-Agent

Running

App Files Files Community

CV-Agent / tool_utils /clip_segmentation.py

Samarth991

modifided clip.py

7fef6fd 11 days ago

raw

history blame contribute delete

3.3 kB

	import cv2
	from matplotlib import pyplot as plt
	import torch
	import numpy as np
	from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
	from segmentation_mask_overlay import overlay_masks
	from typing import List
	import logging

	class CLIPSEG:
	def __init__(self,model_name = "CIDAS/clipseg-rd64-refined",threshould=0.60):
	self.clip_processor = CLIPSegProcessor.from_pretrained(model_name)
	self.clip_model = CLIPSegForImageSegmentation.from_pretrained(model_name)
	self.threshould = threshould
	self.clip_model.to('cpu')

	@ staticmethod
	def create_single_mask(predicted_masks , color = None ):

	if len(predicted_masks)>0:
	mask_image = np.zeros_like(predicted_masks[0])
	else:
	mask_image = np.zeros(shape=(352,352),dtype=np.unit8)
	for masks in predicted_masks:
	mask_image = np.bitwise_or(mask_image,masks)
	return mask_image

	@staticmethod
	def create_rgb_mask(mask,color=None):
	color = tuple(np.random.choice(range(128,255), size=3))
	gray_3_channel = cv2.merge((mask, mask, mask))
	gray_3_channel[mask==255] = 255 # for orignial color
	return gray_3_channel.astype(np.uint8)

	def get_segmentation_mask(self,image_path:str,object_prompts:List):
	image = cv2.cvtColor(cv2.imread(image_path),cv2.COLOR_BGR2RGB)
	logging.info("objects found out from the image :{}".format(object_prompts))

	predicted_masks = []
	inputs = self.clip_processor(
	text=object_prompts,
	images=[image] * len(object_prompts),
	padding="max_length",
	return_tensors="pt",
	)
	with torch.no_grad(): # Use 'torch.no_grad()' to disable gradient computation
	outputs = self.clip_model(**inputs)
	preds = outputs.logits.unsqueeze(1)
	# detections = outputs.logits[0] # Assuming class index 0

	for i in range(preds.shape[0]):
	predicted_mask = torch.sigmoid(preds[i][0]).detach().cpu().numpy()
	predicted_mask = np.where(predicted_mask>self.threshould, 255,0)
	predicted_masks.append(predicted_mask)

	final_mask = self.create_single_mask(predicted_masks)
	rgb_predicted_mask = self.create_rgb_mask(final_mask)

	resize_image = cv2.resize(image,(352,352))
	rgb_mask_img = cv2.bitwise_and(resize_image,rgb_predicted_mask )

	# mask_labels = [f"{prompt}_{i}" for i,prompt in enumerate(object_prompts)]
	# cmap = plt.cm.tab20(np.arange(len(mask_labels)))[..., :-1]

	# bool_masks = [predicted_mask.astype('bool') for predicted_mask in predicted_masks]
	# final_mask = overlay_masks(resize_image,np.stack(bool_masks,-1),labels=mask_labels,colors=cmap,alpha=0.5,beta=0.7)
	try:
	cv2.imwrite('final_mask.png',rgb_mask_img)
	return 'Segmentation image created : final_mask.png'
	except Exception as e:
	logging.error("Error while saving the final mask :",e)
	return "unable to create a mask image "

	if __name__=="__main__":
	clip = CLIPSEG()
	obj = clip.get_segmentation_mask(image_path="../image_store/demo.jpg",object_prompts=['sand','dog'])