JunsWan commited on
Commit
375b998
·
verified ·
1 Parent(s): 1e754f8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +23 -47
README.md CHANGED
@@ -1,8 +1,5 @@
1
  ---
2
- title: Zebra Logic Bench
3
- emoji: 🦓
4
- colorFrom: blue
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 4.19.2
8
  app_file: app.py
@@ -13,49 +10,28 @@ api: false
13
  tags:
14
  - leaderboard
15
  datasets:
16
- - allenai/ZebraLogicBench
17
- - WildEval/ZebraLogic
18
  models:
19
- - Qwen/Qwen2-72B-Instruct
20
- - Qwen/Qwen1.5-72B-Chat
21
- - Qwen/Qwen1.5-7B-Chat
22
- - meta-llama/Meta-Llama-3-8B-Instruct
23
- - meta-llama/Meta-Llama-3-70B-Instruct
24
- - meta-llama/Llama-2-13b-chat-hf
25
- - meta-llama/Llama-2-70b-chat-hf
26
- - meta-llama/Llama-2-7b-chat-hf
27
- - mistralai/Mistral-7B-Instruct-v0.1
28
- - mistralai/Mistral-7B-Instruct-v0.2
29
- - mistralai/Mixtral-8x7B-Instruct-v0.1
30
- - microsoft/Phi-3-medium-128k-instruct
31
- - microsoft/Phi-3-mini-128k-instruct
32
- - NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
33
- - NousResearch/Hermes-2-Theta-Llama-3-8B
34
- - 01-ai/Yi-1.5-34B-Chat
35
- - 01-ai/Yi-1.5-9B-Chat
36
- - 01-ai/Yi-1.5-6B-Chat
37
- - google/gemma-7b-it
38
- - google/gemma-2b-it
39
- - allenai/tulu-2-dpo-70b
40
- - HuggingFaceH4/zephyr-7b-beta
41
- - Nexusflow/Starling-LM-7B-beta
42
- - databricks/dbrx-instruct
43
- - princeton-nlp/Llama-3-Instruct-8B-SimPO
44
- - chujiezheng/Llama-3-Instruct-8B-SimPO-ExPO
45
- - chujiezheng/Starling-LM-7B-beta-ExPO
46
- - ZhangShenao/SELM-Zephyr-7B-iter-3
47
- - deepseek-ai/DeepSeek-V2-Chat
48
- - m-a-p/neo_7b_instruct_v0.1
49
- - 01-ai/Yi-34B-chat
50
- - lmsys/vicuna-13b-v1.5
51
- - HuggingFaceH4/zephyr-7b-gemma-v0.1
52
- - deepseek-ai/DeepSeek-Coder-V2
53
- - THUDM/glm-4-9b-chat
54
- - chujiezheng/neo_7b_instruct_v0.1-ExPO
55
- - ZhangShenao/SELM-Llama-3-8B-Instruct-iter-3
56
  ---
57
 
58
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
59
-
60
- Paper: arxiv.org/abs/2406.04770
61
- Paper: arxiv.org/abs/2502.01100
 
1
  ---
2
+ title: HardcoreLogic Bench
 
 
 
3
  sdk: gradio
4
  sdk_version: 4.19.2
5
  app_file: app.py
 
10
  tags:
11
  - leaderboard
12
  datasets:
13
+ - xhWu-fd/HardcoreLogic
 
14
  models:
15
+ - Qwen/Qwen3-8B
16
+ - Qwen/Qwen3-30B-A3B-Thinking-2507
17
+ - Qwen/Qwen3-32B
18
+ - Qwen/QQwen3-Next-80B-A3B-Thinking
19
+ - Qwen/Qwen3-235B-A22B-Thinking-2507
20
+ - MiniMaxAI/MiniMax-M1-40k
21
+ - deepseek-ai/DeepSeek-R1-0528-Qwen3-8B
22
+ - deepseek-ai/DeepSeek-V3.1
23
+ - deepseek-ai/DeepSeek-R1-0528
24
+ - zai-org/GLM-4.5
25
+ - moonshotai/Kimi-K2-Instruct
26
+ - ByteDance-Seed/Seed-OSS-36B-Instruct
27
+ - openai/gpt-oss-120b
28
+ - gpt-5
29
+ - gpt-5-mini
30
+ - o4-mini
31
+ - grok-4
32
+ - gemini-2.5-pro
33
+ - grok-3-mini
34
+ - claude-sonnet-4-thinking
35
+ - gemini-2.5-flash
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ---
37