Spaces:
Runtime error
Runtime error
{ | |
"time": "241031154353", | |
"results": { | |
"Claude3-Sonnet": { | |
"META": { | |
"Method": [ | |
"Claude3-Sonnet", | |
"https://aws.amazon.com/bedrock/claude/" | |
], | |
"Parameters": "", | |
"Org": "Anthropic", | |
"OpenSource": "No", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 80.75 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 71.63 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 70.17 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 67.76 | |
} | |
}, | |
"Claude2": { | |
"META": { | |
"Method": [ | |
"Claude2", | |
"https://aws.amazon.com/bedrock/claude/" | |
], | |
"Parameters": "", | |
"Org": "Anthropic", | |
"OpenSource": "No", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 75.46 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 65.5 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 63.53 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 65.24 | |
} | |
}, | |
"ChatGPT": { | |
"META": { | |
"Method": [ | |
"ChatGPT", | |
"https://platform.openai.com/docs/models#gpt-3-5-turbo" | |
], | |
"Parameters": "", | |
"Org": "OpenAI", | |
"OpenSource": "No", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 75.63 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 64.97 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 59.79 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 60.81 | |
} | |
}, | |
"LLaMA3-70B-Instruct": { | |
"META": { | |
"Method": [ | |
"LLaMA3-70B-Instruct", | |
"https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct" | |
], | |
"Parameters": "70B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 75.24 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 69.29 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 67.67 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 62.0 | |
} | |
}, | |
"QWen1.5-72B": { | |
"META": { | |
"Method": [ | |
"QWen1.5-72B", | |
"https://huggingface.co/Qwen/Qwen1.5-72B" | |
], | |
"Parameters": "72B", | |
"Org": "Alibaba", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 71.67 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 68.92 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 64.12 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 64.84 | |
} | |
}, | |
"LLaMA3-70B": { | |
"META": { | |
"Method": [ | |
"LLaMA3-70B", | |
"https://huggingface.co/meta-llama/Meta-Llama-3-70B" | |
], | |
"Parameters": "70B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 69.59 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 63.56 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 55.77 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 58.95 | |
} | |
}, | |
"LLaMA2-70B-Chat": { | |
"META": { | |
"Method": [ | |
"LLaMA2-70B-Chat", | |
"https://huggingface.co/meta-llama/Llama-2-70b-chat-hf" | |
], | |
"Parameters": "70B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 61.84 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 40.73 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 44.2 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 47.04 | |
} | |
}, | |
"LLaMA2-70B": { | |
"META": { | |
"Method": [ | |
"LLaMA2-70B", | |
"https://huggingface.co/meta-llama/Llama-2-70b-hf" | |
], | |
"Parameters": "70B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 61.05 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 55.87 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 43.24 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 47.85 | |
} | |
}, | |
"Mixtral-8x7B": { | |
"META": { | |
"Method": [ | |
"Mixtral-8x7B", | |
"https://huggingface.co/mistralai/Mixtral-8x7B-v0.1" | |
], | |
"Parameters": "46.7B", | |
"Org": "MistralAI", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 59.43 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 54.32 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 55.31 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 44.69 | |
} | |
}, | |
"QWen1.5-14B": { | |
"META": { | |
"Method": [ | |
"QWen1.5-14B", | |
"https://huggingface.co/Qwen/Qwen1.5-14B" | |
], | |
"Parameters": "14B", | |
"Org": "Alibaba", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 67.22 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 60.92 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 54.92 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 55.21 | |
} | |
}, | |
"eCeLLM-L": { | |
"META": { | |
"Method": [ | |
"eCeLLM-L", | |
"https://huggingface.co/NingLab/eCeLLM-L" | |
], | |
"Parameters": "13B", | |
"Org": "OSU NingLab", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 61.54 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 54.84 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 54.55 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 59.64 | |
} | |
}, | |
"Vicuna-13B-v1.5": { | |
"META": { | |
"Method": [ | |
"Vicuna-13B-v1.5", | |
"https://huggingface.co/lmsys/vicuna-13b-v1.5" | |
], | |
"Parameters": "13B", | |
"Org": "LMSys", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 59.64 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 52.63 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 49.81 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 49.64 | |
} | |
}, | |
"LLaMA2-13B-Chat": { | |
"META": { | |
"Method": [ | |
"LLaMA2-13B-Chat", | |
"https://huggingface.co/meta-llama/Llama-2-13b-chat-hf" | |
], | |
"Parameters": "13B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 51.79 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 45.01 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 39.95 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 42.99 | |
} | |
}, | |
"LLaMA2-13B": { | |
"META": { | |
"Method": [ | |
"LLaMA2-13B", | |
"https://huggingface.co/meta-llama/Llama-2-13b-hf" | |
], | |
"Parameters": "13B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 45.86 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 39.47 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 39.43 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 44.23 | |
} | |
}, | |
"LLaMA3-8B-Instruct": { | |
"META": { | |
"Method": [ | |
"LLaMA3-8B-Instruct", | |
"https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct" | |
], | |
"Parameters": "8B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 65.26 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 56.84 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 54.88 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 55.37 | |
} | |
}, | |
"LLaMA3-8B": { | |
"META": { | |
"Method": [ | |
"LLaMA3-8B", | |
"https://huggingface.co/meta-llama/Meta-Llama-3-8B" | |
], | |
"Parameters": "8B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 58.02 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 49.74 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 44.16 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 51.03 | |
} | |
}, | |
"QWen1.5-7B": { | |
"META": { | |
"Method": [ | |
"QWen1.5-7B", | |
"https://huggingface.co/Qwen/Qwen1.5-7B" | |
], | |
"Parameters": "7B", | |
"Org": "Alibaba", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 58.89 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 52.34 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 49.81 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 50.14 | |
} | |
}, | |
"eCeLLM-M": { | |
"META": { | |
"Method": [ | |
"eCeLLM-M", | |
"https://huggingface.co/NingLab/eCeLLM-M" | |
], | |
"Parameters": "7B", | |
"Org": "OSU NingLab", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 63.29 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 48.94 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 53.78 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 56.08 | |
} | |
}, | |
"Zephyr-Beta": { | |
"META": { | |
"Method": [ | |
"Zephyr-Beta", | |
"https://huggingface.co/HuggingFaceH4/zephyr-7b-beta" | |
], | |
"Parameters": "7B", | |
"Org": "HuggingFace H4", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 61.65 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 52.57 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 44.73 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 45.35 | |
} | |
}, | |
"Mistral-7B-Instruct": { | |
"META": { | |
"Method": [ | |
"Mistral-7B-Instruct", | |
"https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2" | |
], | |
"Parameters": "7B", | |
"Org": "MistralAI", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 62.03 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 46.36 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 42.21 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 43.32 | |
} | |
}, | |
"Mistral-7B": { | |
"META": { | |
"Method": [ | |
"Mistral-7B", | |
"https://huggingface.co/mistralai/Mistral-7B-v0.1" | |
], | |
"Parameters": "7B", | |
"Org": "MistralAI", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 55.82 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 46.69 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 46.27 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 41.47 | |
} | |
}, | |
"Vicuna-7B-v1.5": { | |
"META": { | |
"Method": [ | |
"Vicuna-7B-v1.5", | |
"https://huggingface.co/lmsys/vicuna-7b-v1.5" | |
], | |
"Parameters": "7B", | |
"Org": "LMSys", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 53.46 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 45.06 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 41.11 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 43.82 | |
} | |
}, | |
"LLaMA2-7B-Chat": { | |
"META": { | |
"Method": [ | |
"LLaMA2-7B-Chat", | |
"https://huggingface.co/meta-llama/Llama-2-7b-chat-hf" | |
], | |
"Parameters": "7B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 51.67 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 43.48 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 41.42 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 40.43 | |
} | |
}, | |
"LLaMA2-7B": { | |
"META": { | |
"Method": [ | |
"LLaMA2-7B", | |
"https://huggingface.co/meta-llama/Llama-2-7b-hf" | |
], | |
"Parameters": "7B", | |
"Org": "Meta", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 38.22 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 32.81 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 32.56 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 27.71 | |
} | |
}, | |
"QWen1.5-4B": { | |
"META": { | |
"Method": [ | |
"QWen1.5-4B", | |
"https://huggingface.co/Qwen/Qwen1.5-4B" | |
], | |
"Parameters": "4B", | |
"Org": "Alibaba", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 57.21 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 52.56 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 42.74 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 49.78 | |
} | |
}, | |
"Phi-2": { | |
"META": { | |
"Method": [ | |
"Phi-2", | |
"https://huggingface.co/microsoft/phi-2" | |
], | |
"Parameters": "2.8B", | |
"Org": "Microsoft", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 49.34 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 42.83 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 36.38 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 32.91 | |
} | |
}, | |
"eCeLLM-S": { | |
"META": { | |
"Method": [ | |
"eCeLLM-S", | |
"https://huggingface.co/NingLab/eCeLLM-S" | |
], | |
"Parameters": "2.8B", | |
"Org": "OSU NingLab", | |
"OpenSource": "Yes", | |
"Verified": "Yes" | |
}, | |
"Shopping Concept Understanding": { | |
"Overall": 49.4 | |
}, | |
"Shopping Knowledge Reasoning": { | |
"Overall": 39.06 | |
}, | |
"User Behavior Alignment": { | |
"Overall": 36.33 | |
}, | |
"Multi-lingual Abilities": { | |
"Overall": 32.79 | |
} | |
} | |
} | |
} |