| merge_method: slerp | |
| base_model: meta-llama/Llama-3.1-8B-Instruct | |
| dtype: bfloat16 | |
| models: | |
| - model: meta-llama/Llama-3.1-8B-Instruct | |
| - model: Salesforce/Llama-xLAM-2-8b-fc-r | |
| parameters: | |
| t: | |
| # Strongly favor xLAM-2 in Q/K/V projections (for sharper function-calling) | |
| - filter: self_attn.q_proj | |
| value: 0.9 | |
| - filter: self_attn.k_proj | |
| value: 0.9 | |
| - filter: self_attn.v_proj | |
| value: 0.9 | |
| # Also boost its final attention out_proj | |
| - filter: self_attn.out_proj | |
| value: 0.8 | |
| # Keep the MLP layers more Instruct‑leaning | |
| - filter: mlp | |
| value: 0.3 | |
| # Give xLAM’s LM head extra say for function-call formatting | |
| - filter: lm_head | |
| value: 0.7 | |
| # All other params at an even blend | |
| - value: 0.5 |