<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-CA">
	<id>https://helixprojectai.com:443/wiki/index.php?action=history&amp;feed=atom&amp;title=HMI%E2%80%912025%E2%80%91G4SH_%E2%80%94_Granite%E2%80%914_Small%E2%80%91Hybrid</id>
	<title>HMI‑2025‑G4SH — Granite‑4 Small‑Hybrid - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://helixprojectai.com:443/wiki/index.php?action=history&amp;feed=atom&amp;title=HMI%E2%80%912025%E2%80%91G4SH_%E2%80%94_Granite%E2%80%914_Small%E2%80%91Hybrid"/>
	<link rel="alternate" type="text/html" href="https://helixprojectai.com:443/wiki/index.php?title=HMI%E2%80%912025%E2%80%91G4SH_%E2%80%94_Granite%E2%80%914_Small%E2%80%91Hybrid&amp;action=history"/>
	<updated>2026-06-05T06:13:10Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.41.0</generator>
	<entry>
		<id>https://helixprojectai.com:443/wiki/index.php?title=HMI%E2%80%912025%E2%80%91G4SH_%E2%80%94_Granite%E2%80%914_Small%E2%80%91Hybrid&amp;diff=228&amp;oldid=prev</id>
		<title>Steve Helix at 13:41, 13 October 2025</title>
		<link rel="alternate" type="text/html" href="https://helixprojectai.com:443/wiki/index.php?title=HMI%E2%80%912025%E2%80%91G4SH_%E2%80%94_Granite%E2%80%914_Small%E2%80%91Hybrid&amp;diff=228&amp;oldid=prev"/>
		<updated>2025-10-13T13:41:56Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;a href=&quot;https://helixprojectai.com:443/wiki/index.php?title=HMI%E2%80%912025%E2%80%91G4SH_%E2%80%94_Granite%E2%80%914_Small%E2%80%91Hybrid&amp;amp;diff=228&amp;amp;oldid=227&quot;&gt;Show changes&lt;/a&gt;</summary>
		<author><name>Steve Helix</name></author>
	</entry>
	<entry>
		<id>https://helixprojectai.com:443/wiki/index.php?title=HMI%E2%80%912025%E2%80%91G4SH_%E2%80%94_Granite%E2%80%914_Small%E2%80%91Hybrid&amp;diff=227&amp;oldid=prev</id>
		<title>Steve Helix: Created page with &quot; = HMI‑2025‑G4SH — Granite‑4 Small‑Hybrid (Helix Model Integration Sheet) = &lt;blockquote&gt;&#039;&#039;&#039;Status:&#039;&#039;&#039; READY FOR INTEGRATION • &#039;&#039;&#039;Owner:&#039;&#039;&#039; Helix Ops • &#039;&#039;&#039;Last Updated:&#039;&#039;&#039; 2025‑10‑13&lt;/blockquote&gt; ----  == 0) At‑a‑Glance ==  * &#039;&#039;&#039;Model ID (Helix):&#039;&#039;&#039; &lt;code&gt;granite4-small-h&lt;/code&gt; * &#039;&#039;&#039;Upstream Name:&#039;&#039;&#039; &lt;code&gt;ibm/granite4:small-h&lt;/code&gt; (aka &#039;&#039;&#039;Granite 4.0 H‑Small&#039;&#039;&#039;) * &#039;&#039;&#039;Family:&#039;&#039;&#039; IBM Granite 4.0 (hybrid Mamba‑2 + Transformer) * &#039;&#039;&#039;License:&#039;&#039;&#039; A...&quot;</title>
		<link rel="alternate" type="text/html" href="https://helixprojectai.com:443/wiki/index.php?title=HMI%E2%80%912025%E2%80%91G4SH_%E2%80%94_Granite%E2%80%914_Small%E2%80%91Hybrid&amp;diff=227&amp;oldid=prev"/>
		<updated>2025-10-13T13:24:54Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot; = HMI‑2025‑G4SH — Granite‑4 Small‑Hybrid (Helix Model Integration Sheet) = &amp;lt;blockquote&amp;gt;&amp;#039;&amp;#039;&amp;#039;Status:&amp;#039;&amp;#039;&amp;#039; READY FOR INTEGRATION • &amp;#039;&amp;#039;&amp;#039;Owner:&amp;#039;&amp;#039;&amp;#039; Helix Ops • &amp;#039;&amp;#039;&amp;#039;Last Updated:&amp;#039;&amp;#039;&amp;#039; 2025‑10‑13&amp;lt;/blockquote&amp;gt; ----  == 0) At‑a‑Glance ==  * &amp;#039;&amp;#039;&amp;#039;Model ID (Helix):&amp;#039;&amp;#039;&amp;#039; &amp;lt;code&amp;gt;granite4-small-h&amp;lt;/code&amp;gt; * &amp;#039;&amp;#039;&amp;#039;Upstream Name:&amp;#039;&amp;#039;&amp;#039; &amp;lt;code&amp;gt;ibm/granite4:small-h&amp;lt;/code&amp;gt; (aka &amp;#039;&amp;#039;&amp;#039;Granite 4.0 H‑Small&amp;#039;&amp;#039;&amp;#039;) * &amp;#039;&amp;#039;&amp;#039;Family:&amp;#039;&amp;#039;&amp;#039; IBM Granite 4.0 (hybrid Mamba‑2 + Transformer) * &amp;#039;&amp;#039;&amp;#039;License:&amp;#039;&amp;#039;&amp;#039; A...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;&lt;br /&gt;
= HMI‑2025‑G4SH — Granite‑4 Small‑Hybrid (Helix Model Integration Sheet) =&lt;br /&gt;
&amp;lt;blockquote&amp;gt;&amp;#039;&amp;#039;&amp;#039;Status:&amp;#039;&amp;#039;&amp;#039; READY FOR INTEGRATION • &amp;#039;&amp;#039;&amp;#039;Owner:&amp;#039;&amp;#039;&amp;#039; Helix Ops • &amp;#039;&amp;#039;&amp;#039;Last Updated:&amp;#039;&amp;#039;&amp;#039; 2025‑10‑13&amp;lt;/blockquote&amp;gt;&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 0) At‑a‑Glance ==&lt;br /&gt;
&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Model ID (Helix):&amp;#039;&amp;#039;&amp;#039; &amp;lt;code&amp;gt;granite4-small-h&amp;lt;/code&amp;gt;&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Upstream Name:&amp;#039;&amp;#039;&amp;#039; &amp;lt;code&amp;gt;ibm/granite4:small-h&amp;lt;/code&amp;gt; (aka &amp;#039;&amp;#039;&amp;#039;Granite 4.0 H‑Small&amp;#039;&amp;#039;&amp;#039;)&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Family:&amp;#039;&amp;#039;&amp;#039; IBM Granite 4.0 (hybrid Mamba‑2 + Transformer)&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;License:&amp;#039;&amp;#039;&amp;#039; Apache‑2.0 (open weights)&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Params (total / active):&amp;#039;&amp;#039;&amp;#039; ~32B total / ~9B active (hybrid MoE)&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Context:&amp;#039;&amp;#039;&amp;#039; long‑context capable via SSM layers; temperature 0 recommended for most inference&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Quantized Builds:&amp;#039;&amp;#039;&amp;#039; Q4_K_M (~19 GB), Q5_K_M (~23 GB) as GGUF/ollama variants&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Targets:&amp;#039;&amp;#039;&amp;#039; Low‑latency enterprise assistants, RAG, tool‑use, code &amp;amp; ops copilots&lt;br /&gt;
&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 1) Provenance &amp;amp; Verification ==&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;Sources:&amp;#039;&amp;#039;&amp;#039; IBM Granite 4.0 model cards/repos + Ollama registry entries. Record the exact artifact you deploy.&lt;br /&gt;
&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;Record on deploy:&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
&lt;br /&gt;
* &amp;lt;code&amp;gt;source_url&amp;lt;/code&amp;gt;: (HF/GitHub/Ollama)&lt;br /&gt;
* &amp;lt;code&amp;gt;artifact_sha256&amp;lt;/code&amp;gt;: (computed locally)&lt;br /&gt;
* &amp;lt;code&amp;gt;artifact_size_bytes&amp;lt;/code&amp;gt;: (from filesystem)&lt;br /&gt;
* &amp;lt;code&amp;gt;pull_command&amp;lt;/code&amp;gt;: (&amp;lt;code&amp;gt;ollama pull ibm/granite4:small-h&amp;lt;/code&amp;gt; or HF path)&lt;br /&gt;
* &amp;lt;code&amp;gt;signed_by&amp;lt;/code&amp;gt;: (if signed; attach attestation if present)&lt;br /&gt;
* &amp;lt;code&amp;gt;helix_proof_id&amp;lt;/code&amp;gt;: (SHA‑256 of this HMI file)&lt;br /&gt;
&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;Hashing procedure:&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
 &amp;lt;code&amp;gt;# After pull/export to file (example path):&lt;br /&gt;
 sha256sum /opt/models/ibm/granite4-small-h.gguf | tee /opt/helix/proofs/models/granite4-small-h.sha256&amp;lt;/code&amp;gt;&lt;br /&gt;
Attach the resulting &amp;lt;code&amp;gt;.sha256&amp;lt;/code&amp;gt; to the deployment record and reference it from TTD consent.&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 2) Compatibility Matrix (Helix Runtimes) ==&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot;&lt;br /&gt;
!Runtime&lt;br /&gt;
!Status&lt;br /&gt;
!Notes&lt;br /&gt;
|-&lt;br /&gt;
|&amp;#039;&amp;#039;&amp;#039;Ollama&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
|✅ Primary&lt;br /&gt;
|Official &amp;lt;code&amp;gt;ibm/granite4:small-h&amp;lt;/code&amp;gt; images; multiple quantizations&lt;br /&gt;
|-&lt;br /&gt;
|&amp;#039;&amp;#039;&amp;#039;vLLM&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
|✅&lt;br /&gt;
|Use HF checkpoint; ensure Mamba‑2 support flags enabled&lt;br /&gt;
|-&lt;br /&gt;
|&amp;#039;&amp;#039;&amp;#039;Text‑Gen Inference (TGI)&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
|✅&lt;br /&gt;
|Load via transformers w/ Mamba‑2 kernels&lt;br /&gt;
|-&lt;br /&gt;
|&amp;#039;&amp;#039;&amp;#039;LM Studio&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
|✅&lt;br /&gt;
|H‑Small listed; for local eval&lt;br /&gt;
|}&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 3) Helix Registry Entry (YAML) ==&lt;br /&gt;
 &amp;lt;code&amp;gt;# /opt/helix/registry/models/granite4-small-h.yaml&lt;br /&gt;
 model_id: granite4-small-h&lt;br /&gt;
 family: granite&lt;br /&gt;
 version: &amp;quot;4.0&amp;quot;&lt;br /&gt;
 upstream: ibm/granite4:small-h&lt;br /&gt;
 license: Apache-2.0&lt;br /&gt;
 architecture:&lt;br /&gt;
   type: hybrid&lt;br /&gt;
   mix: [transformer, mamba2]&lt;br /&gt;
   moe:&lt;br /&gt;
     total_parameters_b: 32&lt;br /&gt;
     active_parameters_b: 9&lt;br /&gt;
 context:&lt;br /&gt;
   recommended_temperature: 0&lt;br /&gt;
   max_new_tokens_default: 512&lt;br /&gt;
 quantizations:&lt;br /&gt;
   - name: Q4_K_M&lt;br /&gt;
     approx_size_gb: 19&lt;br /&gt;
   - name: Q5_K_M&lt;br /&gt;
     approx_size_gb: 23&lt;br /&gt;
 capabilities:&lt;br /&gt;
   instruction_following: strong&lt;br /&gt;
   tool_use: strong&lt;br /&gt;
   code: medium&lt;br /&gt;
   languages: [en, de, es, fr, ja, pt, ar, cs, it, ko, nl, zh]&lt;br /&gt;
 qsr_profile: granite4-small-h-2025Q4&lt;br /&gt;
 policy:&lt;br /&gt;
   pii_scrub: helix-default&lt;br /&gt;
   safety_tier: standard&lt;br /&gt;
   audit_headers: true&amp;lt;/code&amp;gt;&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 4) TTD/Helix Audit Headers ==&lt;br /&gt;
Add these headers (or JSON fields) to each generation record emitted by the router.&lt;br /&gt;
 &amp;lt;code&amp;gt;{&lt;br /&gt;
   &amp;quot;model&amp;quot;: &amp;quot;granite4-small-h&amp;quot;,&lt;br /&gt;
   &amp;quot;vendor&amp;quot;: &amp;quot;ibm&amp;quot;,&lt;br /&gt;
   &amp;quot;version&amp;quot;: &amp;quot;4.0&amp;quot;,&lt;br /&gt;
   &amp;quot;artifact_sha256&amp;quot;: &amp;quot;&amp;lt;fill-from-provenance&amp;gt;&amp;quot;,&lt;br /&gt;
   &amp;quot;quantization&amp;quot;: &amp;quot;Q4_K_M|Q5_K_M|bf16&amp;quot;,&lt;br /&gt;
   &amp;quot;inference_stack&amp;quot;: &amp;quot;ollama|vllm|tgi&amp;quot;,&lt;br /&gt;
   &amp;quot;router&amp;quot;: &amp;quot;helix-ttd-shim/&amp;gt;=1.3&amp;quot;,&lt;br /&gt;
   &amp;quot;x_granite_proof&amp;quot;: {&lt;br /&gt;
     &amp;quot;source&amp;quot;: &amp;quot;ollama|hf|github&amp;quot;,&lt;br /&gt;
     &amp;quot;pulled_at&amp;quot;: &amp;quot;&amp;lt;iso8601&amp;gt;&amp;quot;,&lt;br /&gt;
     &amp;quot;attestation&amp;quot;: &amp;quot;&amp;lt;optional-blob-or-url&amp;gt;&amp;quot;&lt;br /&gt;
   }&lt;br /&gt;
 }&amp;lt;/code&amp;gt;&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 5) QSR Defaults (Helix Quality Score Rubric) ==&lt;br /&gt;
Use these as initial thresholds; tune with live telemetry over the first week.&lt;br /&gt;
 &amp;lt;code&amp;gt;# /opt/helix/qsr/profiles/granite4-small-h.yaml&lt;br /&gt;
 profile_id: granite4-small-h-2025Q4&lt;br /&gt;
 weights:&lt;br /&gt;
   coherence: 0.28&lt;br /&gt;
   accuracy: 0.26&lt;br /&gt;
   completeness: 0.18&lt;br /&gt;
   relevance: 0.18&lt;br /&gt;
   novelty: 0.10&lt;br /&gt;
 thresholds:&lt;br /&gt;
   soft_flag: 0.74     # trigger human‑in‑the‑loop note&lt;br /&gt;
   hard_block: 0.62    # route to rollback / alternative model&lt;br /&gt;
 mri_risk_tiers:&lt;br /&gt;
   low:    [0.80, 1.00]&lt;br /&gt;
   medium: [0.70, 0.80)&lt;br /&gt;
   high:   [0.00, 0.70)&lt;br /&gt;
 fallback_chain:&lt;br /&gt;
   - model: magnus-supernova&lt;br /&gt;
   - model: qwen3-7b&lt;br /&gt;
   - model: deepseek-coder&amp;lt;/code&amp;gt;&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;Calibration plan:&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
&lt;br /&gt;
* Run 200‑sample eval across Helix golden tasks (RAG, tool‑use, code review).&lt;br /&gt;
* Fit reliability diagram; adjust &amp;lt;code&amp;gt;soft_flag&amp;lt;/code&amp;gt; to equalize FP/FN at δ≤2%.&lt;br /&gt;
* Freeze for 7 days; revisit after first incident or drift &amp;gt;3%.&lt;br /&gt;
&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 6) Integration Steps ==&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;A) Pull &amp;amp; list (Ollama):&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
 &amp;lt;code&amp;gt;ollama pull ibm/granite4:small-h&lt;br /&gt;
 ollama list | grep granite4&amp;lt;/code&amp;gt;&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;B) Enable in Helix router:&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
 &amp;lt;code&amp;gt;sudo tee /opt/helix/router/models.d/granite4-small-h.json &amp;gt;/dev/null &amp;lt;&amp;lt;&amp;#039;JSON&amp;#039;&lt;br /&gt;
 {&lt;br /&gt;
   &amp;quot;route&amp;quot;: &amp;quot;granite4-small-h&amp;quot;,&lt;br /&gt;
   &amp;quot;backend&amp;quot;: &amp;quot;ollama&amp;quot;,&lt;br /&gt;
   &amp;quot;model&amp;quot;: &amp;quot;ibm/granite4:small-h&amp;quot;,&lt;br /&gt;
   &amp;quot;parameters&amp;quot;: {&amp;quot;temperature&amp;quot;: 0, &amp;quot;num_ctx&amp;quot;: 32768}&lt;br /&gt;
 }&lt;br /&gt;
 JSON&lt;br /&gt;
 systemctl restart helix-router&amp;lt;/code&amp;gt;&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;C) Register proofs:&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
 &amp;lt;code&amp;gt;helix-stats record --model granite4-small-h --source ollama --hash-file /opt/helix/proofs/models/granite4-small-h.sha256&amp;lt;/code&amp;gt;&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 7) RAG / Tool‑Use Settings ==&lt;br /&gt;
&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;RAG:&amp;#039;&amp;#039;&amp;#039; prefer temperature 0, &amp;lt;code&amp;gt;top_p=0.9&amp;lt;/code&amp;gt;, &amp;lt;code&amp;gt;max_new_tokens=512&amp;lt;/code&amp;gt;; penalize repetition &amp;gt;1.1&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Tool‑calling:&amp;#039;&amp;#039;&amp;#039; enable JSON‑mode; enforce schema; set &amp;lt;code&amp;gt;tool_timeout_ms=8000&amp;lt;/code&amp;gt;&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Memory windows:&amp;#039;&amp;#039;&amp;#039; use 2‑phase: compressive memory for history &amp;gt;12k tokens; emit summarization proofs into &amp;lt;code&amp;gt;ttd_memory_v2&amp;lt;/code&amp;gt;&lt;br /&gt;
&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;JSON schema example:&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
 &amp;lt;code&amp;gt;{&lt;br /&gt;
   &amp;quot;name&amp;quot;: &amp;quot;lookup_kb&amp;quot;,&lt;br /&gt;
   &amp;quot;strict&amp;quot;: true,&lt;br /&gt;
   &amp;quot;parameters&amp;quot;: {&lt;br /&gt;
     &amp;quot;type&amp;quot;: &amp;quot;object&amp;quot;,&lt;br /&gt;
     &amp;quot;properties&amp;quot;: {&lt;br /&gt;
       &amp;quot;query&amp;quot;: {&amp;quot;type&amp;quot;: &amp;quot;string&amp;quot;},&lt;br /&gt;
       &amp;quot;top_k&amp;quot;: {&amp;quot;type&amp;quot;: &amp;quot;integer&amp;quot;, &amp;quot;minimum&amp;quot;: 1, &amp;quot;maximum&amp;quot;: 10}&lt;br /&gt;
     },&lt;br /&gt;
     &amp;quot;required&amp;quot;: [&amp;quot;query&amp;quot;],&lt;br /&gt;
     &amp;quot;additionalProperties&amp;quot;: false&lt;br /&gt;
   }&lt;br /&gt;
 }&amp;lt;/code&amp;gt;&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 8) Smoke Test (Helix) ==&lt;br /&gt;
 &amp;lt;code&amp;gt;curl -fsS &amp;lt;nowiki&amp;gt;http://127.0.0.1:9010/chat&amp;lt;/nowiki&amp;gt; -H &amp;#039;content-type: application/json&amp;#039; -d &amp;#039;{&lt;br /&gt;
   &amp;quot;route&amp;quot;: &amp;quot;granite4-small-h&amp;quot;,&lt;br /&gt;
   &amp;quot;messages&amp;quot;: [&lt;br /&gt;
     {&amp;quot;role&amp;quot;:&amp;quot;system&amp;quot;,&amp;quot;content&amp;quot;:&amp;quot;You are a precise Helix assistant. Answer in JSON.&amp;quot;},&lt;br /&gt;
     {&amp;quot;role&amp;quot;:&amp;quot;user&amp;quot;,&amp;quot;content&amp;quot;:&amp;quot;Summarize the Helix Core Ethos in 3 bullets.&amp;quot;}&lt;br /&gt;
   ],&lt;br /&gt;
   &amp;quot;tools&amp;quot;: [],&lt;br /&gt;
   &amp;quot;trace&amp;quot;: true&lt;br /&gt;
 }&amp;#039; | jq &amp;#039;. | {text: .choices[0].message.content, audit: .audit}&amp;#039;&amp;lt;/code&amp;gt;&lt;br /&gt;
Expected: JSON reply ≤ 120 tokens; audit block present with model hash and quantization.&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 9) Observability &amp;amp; KPIs ==&lt;br /&gt;
&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Latency P50/P95:&amp;#039;&amp;#039;&amp;#039; &amp;lt;= 800 ms / 1.8 s @ 1k tok on L40S (target)&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Tool‑call success rate:&amp;#039;&amp;#039;&amp;#039; ≥ 96%&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;RAG groundedness (auto‑eval):&amp;#039;&amp;#039;&amp;#039; ≥ 0.88&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Incident budget:&amp;#039;&amp;#039;&amp;#039; ≤ 1 hard‑block per 5k calls weekly&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Drift trigger:&amp;#039;&amp;#039;&amp;#039; QSR moving average −3% over 24h&lt;br /&gt;
&lt;br /&gt;
Export Prometheus metrics under &amp;lt;code&amp;gt;helix_granite4_small_h_*&amp;lt;/code&amp;gt; (latency, tokens, qsr, blocks, fallbacks).&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 10) Risk &amp;amp; Policy Notes ==&lt;br /&gt;
&lt;br /&gt;
* Hybrid MoE/SSM can surface &amp;#039;&amp;#039;&amp;#039;long‑context carryover errors&amp;#039;&amp;#039;&amp;#039;; reset memory at task boundaries.&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;JSON‑mode hallucination&amp;#039;&amp;#039;&amp;#039;: enforce strict schemas; reject extra fields.&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;PII&amp;#039;&amp;#039;&amp;#039;: apply Helix PII scrubber pre‑ and post‑gen; route hits to human review.&lt;br /&gt;
* &amp;#039;&amp;#039;&amp;#039;Rollback:&amp;#039;&amp;#039;&amp;#039; pre‑stage &amp;lt;code&amp;gt;magnus-supernova&amp;lt;/code&amp;gt; + &amp;lt;code&amp;gt;qwen3-7b&amp;lt;/code&amp;gt;; automatic switchover on &amp;lt;code&amp;gt;hard_block&amp;lt;/code&amp;gt;.&lt;br /&gt;
&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 11) Rollout Plan ==&lt;br /&gt;
&lt;br /&gt;
# &amp;#039;&amp;#039;&amp;#039;Stage&amp;#039;&amp;#039;&amp;#039; (dev): canary 5% of RAG traffic for 24h → compare KPIs.&lt;br /&gt;
# &amp;#039;&amp;#039;&amp;#039;Pilot&amp;#039;&amp;#039;&amp;#039; (prod shadow): mirror 10% queries; human‑only consumption.&lt;br /&gt;
# &amp;#039;&amp;#039;&amp;#039;Prod&amp;#039;&amp;#039;&amp;#039;: ramp 10% → 25% → 50% with guardrails; freeze if incident budget breached.&lt;br /&gt;
&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;Change ticket:&amp;#039;&amp;#039;&amp;#039; HMI‑2025‑G4SH‑ROLLOUT‑001&lt;br /&gt;
----&lt;br /&gt;
&lt;br /&gt;
== 12) Appendices ==&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;A) Incantations&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
 &amp;lt;code&amp;gt;# Quantized alt pulls&lt;br /&gt;
 ollama pull ibm/granite4:small-h-q4_K_M&lt;br /&gt;
 ollama pull ibm/granite4:small-h-q5_K_M&lt;br /&gt;
 &lt;br /&gt;
 # vLLM (HF)&lt;br /&gt;
 python - &amp;lt;&amp;lt;&amp;#039;PY&amp;#039;&lt;br /&gt;
 from vllm import LLM&lt;br /&gt;
 llm = LLM(model=&amp;quot;ibm-granite/granite-4.0-h-small&amp;quot;, dtype=&amp;quot;bfloat16&amp;quot;)&lt;br /&gt;
 print(llm.generate([&amp;quot;Hello Granite 4!&amp;quot;]))&lt;br /&gt;
 PY&amp;lt;/code&amp;gt;&lt;br /&gt;
&amp;#039;&amp;#039;&amp;#039;B) Consent stub (TTD)&amp;#039;&amp;#039;&amp;#039;&lt;br /&gt;
 &amp;lt;code&amp;gt;{&lt;br /&gt;
   &amp;quot;consent_id&amp;quot;: &amp;quot;ttd-consent-granite4-small-h-2025-10-13&amp;quot;,&lt;br /&gt;
   &amp;quot;subject&amp;quot;: &amp;quot;Deployment of IBM Granite 4.0 H-Small in Helix&amp;quot;,&lt;br /&gt;
   &amp;quot;artifacts&amp;quot;: [&lt;br /&gt;
     {&amp;quot;name&amp;quot;: &amp;quot;granite4-small-h.gguf&amp;quot;, &amp;quot;sha256&amp;quot;: &amp;quot;&amp;lt;fill&amp;gt;&amp;quot;, &amp;quot;source&amp;quot;: &amp;quot;ollama&amp;quot;}&lt;br /&gt;
   ],&lt;br /&gt;
   &amp;quot;approvers&amp;quot;: [&amp;quot;owner:helix&amp;quot;,&amp;quot;safety_champion&amp;quot;],&lt;br /&gt;
   &amp;quot;effective_from&amp;quot;: &amp;quot;2025-10-13T00:00:00Z&amp;quot;,&lt;br /&gt;
   &amp;quot;notes&amp;quot;: &amp;quot;Apache-2.0; hybrid mamba2+transformer; long-context tasks&amp;quot;&lt;br /&gt;
 }&amp;lt;/code&amp;gt;&lt;br /&gt;
— End of HMI‑2025‑G4SH —&lt;/div&gt;</summary>
		<author><name>Steve Helix</name></author>
	</entry>
</feed>