myyim commited on
Commit
d03234f
·
verified ·
1 Parent(s): 907d320

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +15 -7
  2. app.py +114 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,14 +1,22 @@
1
  ---
2
- title: Vlm Google Paligemma2 3b
3
- emoji: 🌖
4
- colorFrom: gray
5
- colorTo: yellow
6
  sdk: streamlit
7
- sdk_version: 1.42.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
- short_description: Streamlit UI running Google Paligemma2 3B VLM
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Vlm Paligemma2 3B
3
+ emoji: 🐠
4
+ colorFrom: green
5
+ colorTo: pink
6
  sdk: streamlit
7
+ sdk_version: 1.41.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ short_description: Running the VLM PaliGemma 2 3B
12
  ---
13
 
14
+ If you are interested in how to create this app, the following two articles will be useful.
15
+
16
+ Create an App with Streamlit on Hugging Face Spaces to Showcase your AI/ML Projects
17
+
18
+ https://medium.com/p/4edd8f30d542
19
+
20
+ Create Hugging Face Spaces to Showcase your AI/ML Projects: A Step-by-Step Guide
21
+
22
+ https://medium.com/p/11cd1b4463fc
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### import packages
2
+ import torch
3
+ from transformers import (
4
+ PaliGemmaProcessor,
5
+ PaliGemmaForConditionalGeneration,
6
+ )
7
+ import streamlit as st
8
+ from PIL import Image
9
+ import os
10
+
11
+ ### write access token in secrets
12
+ token = os.environ.get('HF_TOKEN')
13
+
14
+ ### choose a paligemma model
15
+ # See https://huggingface.co/collections/google/paligemma-2-release-67500e1e1dbfdd4dee27ba48
16
+ model_id = "google/paligemma2-3b-pt-896"
17
+
18
+ @st.cache_resource
19
+ def model_setup(model_id):
20
+ """
21
+ Sets up the model with @st.cache_resource to cache the function.
22
+
23
+ Args:
24
+ model_id: one of the paligemma models
25
+
26
+ Return:
27
+ model: from PaliGemmaForConditionalGeneration.from_pretrained
28
+ processor: from PaliGemmaProcessor.from_pretrained
29
+
30
+ """
31
+ model = PaliGemmaForConditionalGeneration.from_pretrained(model_id,torch_dtype=torch.bfloat16,device_map="auto",token=token).eval()
32
+ processor = PaliGemmaProcessor.from_pretrained(model_id,token=token)
33
+ return model,processor
34
+
35
+ def run_model(prompt,image):
36
+ """
37
+ Performs inference on user's prompt and image
38
+
39
+ Args:
40
+ prompt: user prompt or task
41
+ image: user's uploaded image
42
+
43
+ Returns:
44
+ output text
45
+ """
46
+ model_inputs = processor(text=prompt, images=image, return_tensors="pt").to(torch.bfloat16).to(model.device)
47
+ input_len = model_inputs["input_ids"].shape[-1]
48
+ with torch.inference_mode():
49
+ generation = model.generate(**model_inputs, max_new_tokens=1000, do_sample=False)
50
+ generation = generation[0][input_len:]
51
+ return processor.decode(generation, skip_special_tokens=True)
52
+
53
+ def initialize():
54
+ """
55
+ Initializes chat history
56
+ """
57
+ st.session_state.messages = []
58
+
59
+ ### load model
60
+ model,processor = model_setup(model_id)
61
+
62
+ ### upload a file
63
+ uploaded_file = st.file_uploader("Choose an image",on_change=initialize)
64
+
65
+ if uploaded_file:
66
+ st.image(uploaded_file)
67
+ image = Image.open(uploaded_file).convert("RGB")
68
+
69
+ # tasks: Caption by default. Accept user prompt only when selected
70
+ task = st.radio(
71
+ "Task",
72
+ tuple(['Caption','OCR','Segment','Enter your prompt']),
73
+ horizontal=True)
74
+
75
+ # display chat messages from history on app rerun
76
+ for message in st.session_state.messages:
77
+ with st.chat_message(message["role"]):
78
+ st.markdown(message["content"])
79
+
80
+ if task == 'Enter your prompt':
81
+ if prompt := st.chat_input("Type here!",key="user_prompt"):
82
+ # display user message in chat message container
83
+ with st.chat_message("user"):
84
+ st.markdown(prompt)
85
+
86
+ # add user message to chat history
87
+ st.session_state.messages.append({"role": "user", "content": prompt})
88
+
89
+ # run the VLM
90
+ response = run_model(prompt,image)
91
+
92
+ # display assistant response in chat message container
93
+ with st.chat_message("assistant"):
94
+ st.markdown(response)
95
+
96
+ # add assistant response to chat history
97
+ st.session_state.messages.append({"role": "assistant", "content": response})
98
+ else:
99
+ # display user message in chat message container
100
+ with st.chat_message("user"):
101
+ st.markdown(task)
102
+
103
+ # add user message to chat history
104
+ st.session_state.messages.append({"role": "user", "content": task})
105
+
106
+ # run the VLM
107
+ response = run_model(task,image)
108
+
109
+ # display assistant response in chat message container
110
+ with st.chat_message("assistant"):
111
+ st.markdown(response)
112
+
113
+ # add assistant response to chat history
114
+ st.session_state.messages.append({"role": "assistant", "content": response})
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ accelerate
4
+ pillow