Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -50,6 +50,13 @@ def llm_response(history,text,img):
|
|
50 |
def sentence_builder(animal, place):
|
51 |
return f"""how many {animal}s from the {place} are shown in the picture?"""
|
52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
# gradio block
|
54 |
|
55 |
with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
|
@@ -83,6 +90,7 @@ with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
|
|
83 |
|
84 |
with gr.Blocks(theme='snehilsanyal/scikit-learn') as app2:
|
85 |
gr.Markdown("## MM 2 ##")
|
|
|
86 |
with gr.Row():
|
87 |
image_box = gr.Image(type="filepath")
|
88 |
|
|
|
50 |
def sentence_builder(animal, place):
|
51 |
return f"""how many {animal}s from the {place} are shown in the picture?"""
|
52 |
|
53 |
+
descript1 = gr.Markdown("""
|
54 |
+
## Multimodal Descript ##
|
55 |
+
|
56 |
+
<h5 align="center"><i>"Imagine learning XXXX."</i></h5>
|
57 |
+
|
58 |
+
Multimodal-CoT incorporates vision features in a decoupled training framework. The framework consists of two training stages: (i) rationale generation and (ii) answer inference. Both stages share the same model architecture but differ in the input and output.
|
59 |
+
""")
|
60 |
# gradio block
|
61 |
|
62 |
with gr.Blocks(theme='snehilsanyal/scikit-learn') as app1:
|
|
|
90 |
|
91 |
with gr.Blocks(theme='snehilsanyal/scikit-learn') as app2:
|
92 |
gr.Markdown("## MM 2 ##")
|
93 |
+
description = descript1
|
94 |
with gr.Row():
|
95 |
image_box = gr.Image(type="filepath")
|
96 |
|