Akshayram1 commited on
Commit
f9300e0
·
verified ·
1 Parent(s): 6117a0b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoProcessor, AutoModelForImageTextToText
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load model and processor
7
+ @st.cache_resource # Cache model to avoid reloading
8
+ def load_model():
9
+ processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
10
+ model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
11
+ return processor, model
12
+
13
+ # Extract text from image using SmolVLM
14
+ def extract_text(image, processor, model):
15
+ # Preprocess image
16
+ inputs = processor(images=image, text="What is the text in this image?", return_tensors="pt")
17
+
18
+ with torch.no_grad():
19
+ outputs = model.generate(**inputs)
20
+
21
+ result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
22
+ return result
23
+
24
+ # Streamlit UI
25
+ def main():
26
+ st.title("🖼️ OCR App using SmolVLM")
27
+ st.write("Upload an image, and I will extract the text for you!")
28
+
29
+ # Load the model and processor
30
+ processor, model = load_model()
31
+
32
+ # File uploader
33
+ uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
34
+
35
+ if uploaded_file is not None:
36
+ # Open image
37
+ image = Image.open(uploaded_file).convert("RGB")
38
+ st.image(image, caption="Uploaded Image", use_column_width=True)
39
+
40
+ # Extract text
41
+ with st.spinner("Extracting text..."):
42
+ extracted_text = extract_text(image, processor, model)
43
+
44
+ # Display result
45
+ st.subheader("📝 Extracted Text:")
46
+ st.write(extracted_text)
47
+
48
+ if __name__ == "__main__":
49
+ main()