Spaces:

seu-ebers
/

teste

Sleeping

App Files Files Community

seu-ebers commited on Jul 10, 2024

Commit

9c94ced

1 Parent(s): 8ec71b6

Commit

Browse files

Files changed (4) hide show

app.py +21 -0
notebook.ipynb +141 -0
old.py +53 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import requests
+import torch
+import streamlit as st
+from transformers import pipeline, AutoProcessor, LlavaForConditionalGeneration
+from PIL import Image
+pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+st.title("Hot Dog? Or Not?")
+file_name = st.file_uploader("Upload a hot dog candidate image")
+if file_name is not None:
+    col1, col2 = st.columns(2)
+    image = Image.open(file_name)
+    col1.image(image, use_column_width=True)
+    predictions = pipeline(image)
+    col2.header("Probabilities")
+    for p in predictions:
+        col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")

notebook.ipynb ADDED Viewed

	@@ -0,0 +1,141 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Load Image to Text model\n",
+    "from transformers import AutoProcessor, AutoModelForCausalLM\n",
+    "import requests\n",
+    "\n",
+    "image_processor = AutoProcessor.from_pretrained(\"microsoft/git-base\")\n",
+    "image_to_text_model = AutoModelForCausalLM.from_pretrained(\"microsoft/git-base\")"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "# Load Translation model\n",
+    "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"google-t5/t5-small\")\n",
+    "model = AutoModelForSeq2SeqLM.from_pretrained(\"google-t5/t5-small\")"
+   ],
+   "id": "be52bb44374be3a1"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "def generate_caption(image):\n",
+    "  pixel_values = image_processor(images=image, return_tensors=\"pt\").pixel_values\n",
+    "  generated_ids = image_to_text_model.generate(pixel_values=pixel_values, max_length=200)\n",
+    "  generated_caption = image_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
+    "\n",
+    "  return generated_caption"
+   ],
+   "id": "eb994d7ef0dc73f6"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "def translate(text):\n",
+    "  inputs = tokenizer(text, return_tensors='pt')\n",
+    "  input_ids = inputs.input_ids\n",
+    "  attention_mask = inputs.attention_mask\n",
+    "\n",
+    "  try:\n",
+    "    input_ids = input_ids.to('cuda')\n",
+    "    attention_mask = attention_mask.to('cuda')\n",
+    "    model = translation_model.to(\"cuda\")\n",
+    "  except:\n",
+    "    print('No NVidia GPU, model performance may not be as good')\n",
+    "    model = translation_model\n",
+    "\n",
+    "  output = model.generate(input_ids, attention_mask=attention_mask, forced_bos_token_id=tokenizer.lang_code_to_id['pt_XX'])\n",
+    "  translated = tokenizer.decode(output[0], skip_special_tokens=True)\n",
+    "\n",
+    "  return translated"
+   ],
+   "id": "f9742a337b32cc1"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "# Carregamento de imagens locais\n",
+    "import sys\n",
+    "import cv2\n",
+    "from PIL import Image\n",
+    "\n",
+    "img_url = 'http://images.cocodataset.org/val2017/000000039769.jpg'\n",
+    "# img_url = 'https://farm4.staticflickr.com/3733/9000662079_ce3599d0d8_z.jpg'\n",
+    "# img_url = 'https://farm4.staticflickr.com/3088/5793281956_2a15b2559c_z.jpg'\n",
+    "# img_url = 'https://farm5.staticflickr.com/4073/4816939054_844feb0078_z.jpg'\n",
+    "\n",
+    "image = Image.open(requests.get(img_url, stream=True).raw)"
+   ],
+   "id": "97f3e60bca81b195"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "caption = generate_caption(image)\n",
+    "\n",
+    "print(caption)"
+   ],
+   "id": "1a4c1ed0fc31fd60"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "translated_caption = translate(caption)\n",
+    "\n",
+    "print(translated_caption)"
+   ],
+   "id": "a4d4f92f2c0b3922"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

old.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import requests
+import torch
+import streamlit as st
+from transformers import pipeline, AutoProcessor, LlavaForConditionalGeneration
+from PIL import Image
+pipeline = pipeline(task="image-classification", model="julien-c/hotdog-not-hotdog")
+# processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
+# model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b")
+st.title("Hot Dog? Or Not?")
+file_name = st.file_uploader("Upload a hot dog candidate image")
+if file_name is not None:
+    col1, col2 = st.columns(2)
+    image = Image.open(file_name)
+    col1.image(image, use_column_width=True)
+    predictions = pipeline(image)
+    col2.header("Probabilities")
+    for p in predictions:
+        col2.subheader(f"{ p['label'] }: { round(p['score'] * 100, 1)}%")
+# img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
+# raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
+#
+# question = "how many dogs are in the picture?"
+# inputs = processor(raw_image, question, return_tensors="pt")
+#
+# out = model.generate(**inputs)
+# print(processor.decode(out[0], skip_special_tokens=True).strip())
+#
+# model_id = "llava-hf/llava-1.5-7b-hf"
+#
+# prompt = "USER: <image>\nWhat are these?\nASSISTANT:"
+# image_file = "http://images.cocodataset.org/val2017/000000039769.jpg"
+#
+# model = LlavaForConditionalGeneration.from_pretrained(
+#     model_id,
+#     torch_dtype=torch.float16,
+#     low_cpu_mem_usage=True,
+# ).to(0)
+#
+# processor = AutoProcessor.from_pretrained(model_id)
+#
+#
+# raw_image = Image.open(requests.get(image_file, stream=True).raw)
+# inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)
+#
+# output = model.generate(**inputs, max_new_tokens=200, do_sample=False)
+# print(processor.decode(output[0][2:], skip_special_tokens=True))

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+transformers
+torch
+accelerate