Spaces:
Runtime error
Runtime error
| [ | |
| { | |
| "role": "user", | |
| "content": "Give you some pictures e1.jpg, e2.png, e3.jpg, help me count the number of sheep?" | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "task": "image-to-text", | |
| "id": 0, | |
| "dep": [-1], | |
| "args": { | |
| "image": "e1.jpg" | |
| } | |
| }, | |
| { | |
| "task": "object-detection", | |
| "id": 1, | |
| "dep": [-1], | |
| "args": { | |
| "image": "e1.jpg" | |
| } | |
| }, | |
| { | |
| "task": "visual-question-answering", | |
| "id": 2, | |
| "dep": [1], | |
| "args": { | |
| "image": "<GENERATED>-1", | |
| "text": "How many sheep in the picture" | |
| } | |
| }, | |
| { | |
| "task": "image-to-text", | |
| "id": 3, | |
| "dep": [-1], | |
| "args": { | |
| "image": "e2.png" | |
| } | |
| }, | |
| { | |
| "task": "object-detection", | |
| "id": 4, | |
| "dep": [-1], | |
| "args": { | |
| "image": "e2.png" | |
| } | |
| }, | |
| { | |
| "task": "visual-question-answering", | |
| "id": 5, | |
| "dep": [4], | |
| "args": { | |
| "image": "<GENERATED>-4", | |
| "text": "How many sheep in the picture" | |
| } | |
| }, | |
| { | |
| "task": "image-to-text", | |
| "id": 6, | |
| "dep": [-1], | |
| "args": { | |
| "image": "e3.jpg" | |
| } | |
| }, | |
| { | |
| "task": "object-detection", | |
| "id": 7, | |
| "dep": [-1], | |
| "args": { | |
| "image": "e3.jpg" | |
| } | |
| }, | |
| { | |
| "task": "visual-question-answering", | |
| "id": 8, | |
| "dep": [7], | |
| "args": { | |
| "image": "<GENERATED>-7", | |
| "text": "How many sheep in the picture" | |
| } | |
| } | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": "Look at /e.jpg, can you tell me how many objects in the picture? Give me a picture and video similar to this one." | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "task": "image-to-text", | |
| "id": 0, | |
| "dep": [-1], | |
| "args": { | |
| "image": "/e.jpg" | |
| } | |
| }, | |
| { | |
| "task": "object-detection", | |
| "id": 1, | |
| "dep": [-1], | |
| "args": { | |
| "image": "/e.jpg" | |
| } | |
| }, | |
| { | |
| "task": "visual-question-answering", | |
| "id": 2, | |
| "dep": [1], | |
| "args": { | |
| "image": "<GENERATED>-1", | |
| "text": "how many objects in the picture?" | |
| } | |
| }, | |
| { | |
| "task": "text-to-image", | |
| "id": 3, | |
| "dep": [0], | |
| "args": { | |
| "text": "<GENERATED-0>" | |
| } | |
| }, | |
| { | |
| "task": "image-to-image", | |
| "id": 4, | |
| "dep": [-1], | |
| "args": { | |
| "image": "/e.jpg" | |
| } | |
| }, | |
| { | |
| "task": "text-to-video", | |
| "id": 5, | |
| "dep": [0], | |
| "args": { | |
| "text": "<GENERATED-0>" | |
| } | |
| } | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": "given a document /images/e.jpeg, answer me what is the student amount? And describe the image with your voice" | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "task": "document-question-answering", | |
| "id": 0, | |
| "dep": [-1], | |
| "args": { | |
| "image": "/images/e.jpeg", | |
| "text": "what is the student amount?" | |
| } | |
| }, | |
| { | |
| "task": "visual-question-answering", | |
| "id": 1, | |
| "dep": [-1], | |
| "args": { | |
| "image": "/images/e.jpeg", | |
| "text": "what is the student amount?" | |
| } | |
| }, | |
| { | |
| "task": "image-to-text", | |
| "id": 2, | |
| "dep": [-1], | |
| "args": { | |
| "image": "/images/e.jpeg" | |
| } | |
| }, | |
| { | |
| "task": "text-to-speech", | |
| "id": 3, | |
| "dep": [2], | |
| "args": { | |
| "text": "<GENERATED>-2" | |
| } | |
| } | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": "Given an image /example.jpg, first generate a pose image, then based on the pose generate a new image where a girl is reading a book" | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "task": "openpose-control", | |
| "id": 0, | |
| "dep": [-1], | |
| "args": { | |
| "image": "/example.jpg" | |
| } | |
| }, | |
| { | |
| "task": "openpose-text-to-image", | |
| "id": 1, | |
| "dep": [0], | |
| "args": { | |
| "text": "a girl is reading a book", | |
| "image": "<GENERATED>-0" | |
| } | |
| } | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": "please show me a video and an image of (based on the text) 'a boy is running' and dub it" | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "task": "text-to-video", | |
| "id": 0, | |
| "dep": [-1], | |
| "args": { | |
| "text": "a boy is running" | |
| } | |
| }, | |
| { | |
| "task": "text-to-speech", | |
| "id": 1, | |
| "dep": [-1], | |
| "args": { | |
| "text": "a boy is running" | |
| } | |
| }, | |
| { | |
| "task": "text-to-image", | |
| "id": 2, | |
| "dep": [-1], | |
| "args": { | |
| "text": "a boy is running" | |
| } | |
| } | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": "please show me a joke and an image of cat" | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "task": "conversational", | |
| "id": 0, | |
| "dep": [-1], | |
| "args": { | |
| "text": "please show me a joke of cat" | |
| } | |
| }, | |
| { | |
| "task": "text-to-image", | |
| "id": 1, | |
| "dep": [-1], | |
| "args": { | |
| "text": "a photo of cat" | |
| } | |
| } | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": "give me a picture about a cut dog, then describe the image to me and tell a story about it" | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "task": "text-to-image", | |
| "id": 0, | |
| "dep": [-1], | |
| "args": { | |
| "text": "a picture of a cute dog" | |
| } | |
| }, | |
| { | |
| "task": "image-to-text", | |
| "id": 1, | |
| "dep": [0], | |
| "args": { | |
| "image": "<GENERATED>-0" | |
| } | |
| }, | |
| { | |
| "task": "text-generation", | |
| "id": 2, | |
| "dep": [1], | |
| "args": { | |
| "text": "<GENERATED>-1" | |
| } | |
| }, | |
| { | |
| "task": "text-to-speech", | |
| "id": 3, | |
| "dep": [2], | |
| "args": { | |
| "text": "<GENERATED>-2" | |
| } | |
| } | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": "give you a picture /example.jpg, what's in it and tell me a joke about it" | |
| }, | |
| { | |
| "role": "assistant", | |
| "content": [ | |
| { | |
| "task": "image-to-text", | |
| "id": 0, | |
| "dep": [-1], | |
| "args": { | |
| "image": "/example.jpg" | |
| } | |
| }, | |
| { | |
| "task": "object-detection", | |
| "id": 1, | |
| "dep": [-1], | |
| "args": { | |
| "image": "/example.jpg" | |
| } | |
| }, | |
| { | |
| "task": "conversational", | |
| "id": 2, | |
| "dep": [0], | |
| "args": { | |
| "text": "<GENERATED>-0" | |
| } | |
| }, | |
| { | |
| "task": "text-to-speech", | |
| "id": 3, | |
| "dep": [2], | |
| "args": { | |
| "text": "<GENERATED>-1" | |
| } | |
| } | |
| ] | |
| } | |
| ] |