Qwen3.5-9B-NSC-ACE-SABER-GGUF / assets /acta_agentic_gains.svg
DJLougen's picture
Add NSC-ACE benchmark plots
6b02fc9 verified
<svg xmlns="http://www.w3.org/2000/svg" width="1100" height="650" viewBox="0 0 1100 650" role="img" aria-labelledby="title desc">
<title id="title">Held-out Acta agentic benchmark improvements</title>
<desc id="desc">Grouped horizontal bar chart comparing base Qwen3.5-9B and Qwen3.5-9B NSC-ACE on held-out Acta structural agentic metrics.</desc>
<defs>
<linearGradient id="baseGrad" x1="0" x2="1" y1="0" y2="0">
<stop offset="0%" stop-color="#94a3b8"/>
<stop offset="100%" stop-color="#64748b"/>
</linearGradient>
<linearGradient id="aceGrad" x1="0" x2="1" y1="0" y2="0">
<stop offset="0%" stop-color="#22c55e"/>
<stop offset="100%" stop-color="#0f766e"/>
</linearGradient>
<filter id="shadow" x="-10%" y="-30%" width="120%" height="160%">
<feDropShadow dx="0" dy="2" stdDeviation="2" flood-color="#0f172a" flood-opacity="0.18"/>
</filter>
</defs>
<rect width="1100" height="650" rx="18" fill="#f8fafc"/>
<rect x="28" y="28" width="1044" height="594" rx="16" fill="#ffffff" stroke="#e2e8f0"/>
<text x="56" y="76" font-family="Inter, Segoe UI, Arial, sans-serif" font-size="30" font-weight="800" fill="#0f172a">Held-out Acta Agentic Structural Eval</text>
<text x="56" y="108" font-family="Inter, Segoe UI, Arial, sans-serif" font-size="16" fill="#475569">80 held-out prompts. Greedy decoding. Higher is better.</text>
<g font-family="Inter, Segoe UI, Arial, sans-serif" font-size="14" fill="#475569">
<rect x="765" y="64" width="20" height="12" rx="3" fill="url(#baseGrad)"/>
<text x="794" y="75">Qwen3.5-9B base</text>
<rect x="765" y="90" width="20" height="12" rx="3" fill="url(#aceGrad)"/>
<text x="794" y="101">NSC-ACE adapter</text>
</g>
<g stroke="#e2e8f0" stroke-width="1">
<line x1="300" y1="145" x2="300" y2="570"/>
<line x1="445" y1="145" x2="445" y2="570"/>
<line x1="590" y1="145" x2="590" y2="570"/>
<line x1="735" y1="145" x2="735" y2="570"/>
<line x1="880" y1="145" x2="880" y2="570"/>
</g>
<g font-family="Inter, Segoe UI, Arial, sans-serif" font-size="12" fill="#64748b">
<text x="292" y="590">0</text>
<text x="432" y="590">25%</text>
<text x="577" y="590">50%</text>
<text x="722" y="590">75%</text>
<text x="865" y="590">100%</text>
</g>
<g font-family="Inter, Segoe UI, Arial, sans-serif">
<g transform="translate(0,150)">
<text x="56" y="23" font-size="15" font-weight="700" fill="#0f172a">Composite structural score</text>
<rect x="300" y="5" width="466" height="16" rx="8" fill="url(#baseGrad)" filter="url(#shadow)"/>
<rect x="300" y="27" width="549" height="16" rx="8" fill="url(#aceGrad)" filter="url(#shadow)"/>
<text x="779" y="18" font-size="13" fill="#334155">80.4%</text>
<text x="862" y="40" font-size="13" font-weight="700" fill="#065f46">94.7%</text>
<text x="930" y="31" font-size="13" font-weight="700" fill="#0f766e">+14.3 pts</text>
</g>
<g transform="translate(0,210)">
<text x="56" y="23" font-size="15" font-weight="700" fill="#0f172a">Format reward</text>
<rect x="300" y="5" width="409" height="16" rx="8" fill="url(#baseGrad)" filter="url(#shadow)"/>
<rect x="300" y="27" width="548" height="16" rx="8" fill="url(#aceGrad)" filter="url(#shadow)"/>
<text x="722" y="18" font-size="13" fill="#334155">70.6%</text>
<text x="861" y="40" font-size="13" font-weight="700" fill="#065f46">94.4%</text>
<text x="930" y="31" font-size="13" font-weight="700" fill="#0f766e">+23.8 pts</text>
</g>
<g transform="translate(0,270)">
<text x="56" y="23" font-size="15" font-weight="700" fill="#0f172a">Tool-call rate</text>
<rect x="300" y="5" width="479" height="16" rx="8" fill="url(#baseGrad)" filter="url(#shadow)"/>
<rect x="300" y="27" width="566" height="16" rx="8" fill="url(#aceGrad)" filter="url(#shadow)"/>
<text x="792" y="18" font-size="13" fill="#334155">82.5%</text>
<text x="879" y="40" font-size="13" font-weight="700" fill="#065f46">97.5%</text>
<text x="930" y="31" font-size="13" font-weight="700" fill="#0f766e">+15.0 pts</text>
</g>
<g transform="translate(0,330)">
<text x="56" y="23" font-size="15" font-weight="700" fill="#0f172a">1-2 call sweet spot</text>
<rect x="300" y="5" width="457" height="16" rx="8" fill="url(#baseGrad)" filter="url(#shadow)"/>
<rect x="300" y="27" width="551" height="16" rx="8" fill="url(#aceGrad)" filter="url(#shadow)"/>
<text x="770" y="18" font-size="13" fill="#334155">78.8%</text>
<text x="864" y="40" font-size="13" font-weight="700" fill="#065f46">95.0%</text>
<text x="930" y="31" font-size="13" font-weight="700" fill="#0f766e">+16.2 pts</text>
</g>
<g transform="translate(0,390)">
<text x="56" y="23" font-size="15" font-weight="700" fill="#0f172a">Reasoning tag rate</text>
<rect x="300" y="5" width="276" height="16" rx="8" fill="url(#baseGrad)" filter="url(#shadow)"/>
<rect x="300" y="27" width="566" height="16" rx="8" fill="url(#aceGrad)" filter="url(#shadow)"/>
<text x="589" y="18" font-size="13" fill="#334155">47.5%</text>
<text x="879" y="40" font-size="13" font-weight="700" fill="#065f46">97.5%</text>
<text x="930" y="31" font-size="13" font-weight="700" fill="#0f766e">+50.0 pts</text>
</g>
<g transform="translate(0,450)">
<text x="56" y="23" font-size="15" font-weight="700" fill="#0f172a">Tool-use reward</text>
<rect x="300" y="5" width="479" height="16" rx="8" fill="url(#baseGrad)" filter="url(#shadow)"/>
<rect x="300" y="27" width="560" height="16" rx="8" fill="url(#aceGrad)" filter="url(#shadow)"/>
<text x="792" y="18" font-size="13" fill="#334155">82.6%</text>
<text x="873" y="40" font-size="13" font-weight="700" fill="#065f46">96.5%</text>
<text x="930" y="31" font-size="13" font-weight="700" fill="#0f766e">+13.9 pts</text>
</g>
<g transform="translate(0,510)">
<text x="56" y="23" font-size="15" font-weight="700" fill="#0f172a">Reasoning-depth reward</text>
<rect x="300" y="5" width="490" height="16" rx="8" fill="url(#baseGrad)" filter="url(#shadow)"/>
<rect x="300" y="27" width="560" height="16" rx="8" fill="url(#aceGrad)" filter="url(#shadow)"/>
<text x="803" y="18" font-size="13" fill="#334155">84.5%</text>
<text x="873" y="40" font-size="13" font-weight="700" fill="#065f46">96.5%</text>
<text x="930" y="31" font-size="13" font-weight="700" fill="#0f766e">+12.1 pts</text>
</g>
</g>
</svg>