| {"vocab_size": 262144, "context_length": 32768, "emb_dim": 640, "n_heads": 4, "n_layers": 18, "hidden_dim": 2048, "head_dim": 256, "qk_norm": true, "n_kv_groups": 1, "rope_local_base": 10000.0, "rope_base": 1000000.0, "sliding_window": 512, "layer_types": ["sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention"], "dtype": "torch.bfloat16", "query_pre_attn_scalar": 256} |