KB-3022

FIX7 P0 Shaped-Clone CI-Gate — ci_seal_vs_bytes_gate.py

9 min read Revision 1
tool-kiem-thufix7p0production-shaped-cloneci-gate-packet2026-06-12

#!/usr/bin/env python3

-- coding: utf-8 --

""" FIX7 P0 - CI seal-vs-bytes gate :: reference, fail-closed implementation.

DESIGN ARTIFACT ONLY. This is a local/static reference implementation of the CI gate proposed in ci-seal-vs-bytes-gate-design.md. It triggers NO CI, touches NO production, and changes NO secrets. It is meant to be reviewed and, if the owner adopts it, wired in a throwaway non-production branch first.

Core rule (the whole point):

  • Read each sealed file in BINARY mode.
  • Hash the RAW on-disk bytes. Never decode -> re-encode -> re-serialize.
  • Compare BOTH sha256 AND exact byte length against the committed seal manifest.
  • A semantic-equivalent but byte-different file (e.g. JSON re-dumped with ensure_ascii flipped, an em-dash that became an ASCII hyphen, a BOM, CRLF, or a trailing-newline change) MUST fail. Equal-logical-value is NOT equal-bytes.
  • On ANY mismatch or missing file: exit non-zero (blocks merge/deploy). The gate never prints a PASS token on mismatch.

Seal manifest format (one record per line, '#' comments allowed): <sha256> <byte_len> <relpath>

Usage: ci_seal_vs_bytes_gate.py --manifest seal-manifest.sha256 --root <dir> ci_seal_vs_bytes_gate.py --selftest """ import hashlib import json import os import sys

def sha256_file_bytes(path): with open(path, "rb") as fh: # binary: no decode, no normalization data = fh.read() return hashlib.sha256(data).hexdigest(), len(data)

def parse_manifest(text): records = [] for raw in text.splitlines(): line = raw.strip() if not line or line.startswith("#"): continue parts = line.split(None, 2) if len(parts) != 3: return None, "MALFORMED_MANIFEST_LINE: " + raw sha, length, rel = parts try: length = int(length) except ValueError: return None, "MALFORMED_LENGTH: " + raw records.append({"sha256": sha, "byte_len": length, "relpath": rel}) return records, None

def verify(manifest_path, root): failures = [] if not os.path.exists(manifest_path): return ["MISSING_MANIFEST:" + manifest_path] with open(manifest_path, "r", encoding="utf-8") as fh: records, err = parse_manifest(fh.read()) if err: return [err] if not records: return ["EMPTY_MANIFEST"] # empty manifest is itself a fail-closed condition for rec in records: fpath = os.path.join(root, rec["relpath"]) if not os.path.exists(fpath): failures.append("MISSING_SEALED_FILE:" + rec["relpath"]) continue actual_sha, actual_len = sha256_file_bytes(fpath) if actual_len != rec["byte_len"]: failures.append("BYTE_LENGTH_MISMATCH:%s (sealed %d, actual %d)" % (rec["relpath"], rec["byte_len"], actual_len)) if actual_sha != rec["sha256"]: failures.append("SHA256_MISMATCH:" + rec["relpath"]) return failures

---------------------------------------------------------------------------

Selftest: prove the gate fails closed on every drift class the macro names.

Writes nothing permanent; uses an in-memory temp dir under /tmp.

---------------------------------------------------------------------------

def _write(path, data_bytes): with open(path, "wb") as fh: fh.write(data_bytes)

def selftest(): import tempfile results = [] with tempfile.TemporaryDirectory(prefix="fix7-ci-gate-selftest.") as d: # The canonical sealed file (stand-in for the P7-pinned canonicalizer body). good = "FIX7 canon body: range 442-461 sealed; pin held.\n".encode("utf-8") gpath = os.path.join(d, "canon.md") _write(gpath, good) gsha = hashlib.sha256(good).hexdigest() man = os.path.join(d, "seal-manifest.sha256") with open(man, "w", encoding="utf-8") as fh: fh.write("# sha256 byte_len relpath\n") fh.write("%s %d canon.md\n" % (gsha, len(good)))

    # Case A: byte-identical -> PASS (no failures).
    fa = verify(man, d)
    results.append(("byte_identical_passes", len(fa) == 0, fa))

    # Case B: sha mismatch (content edited, same length-ish) -> FAIL.
    _write(gpath, "FIX7 canon body: range 442-461 EDITED; pin held.\n".encode("utf-8"))
    fb = verify(man, d)
    results.append(("edited_content_fails", len(fb) > 0, fb))
    _write(gpath, good)  # restore

    # Case C: em-dash drift (ASCII hyphen -> U+2014) -> different bytes -> FAIL.
    emdash = "FIX7 canon body: range 442–461 sealed; pin held.\n".encode("utf-8")
    _write(gpath, emdash)
    fc = verify(man, d)
    results.append(("emdash_unicode_drift_fails", len(fc) > 0, fc))
    _write(gpath, good)

    # Case D: ensure_ascii re-encode of equal-logical JSON -> different bytes -> FAIL.
    objpath = os.path.join(d, "obj.json")
    raw = json.dumps({"r": "442–461"}, ensure_ascii=False).encode("utf-8")
    esc = json.dumps({"r": "442–461"}, ensure_ascii=True).encode("utf-8")
    _write(objpath, raw)
    osha = hashlib.sha256(raw).hexdigest()
    man2 = os.path.join(d, "m2.sha256")
    with open(man2, "w", encoding="utf-8") as fh:
        fh.write("%s  %d  obj.json\n" % (osha, len(raw)))
    # now replace with the escaped (semantically equal) bytes -> must FAIL
    _write(objpath, esc)
    fd = verify(man2, d)
    results.append(("json_ensure_ascii_drift_fails", len(fd) > 0, fd))

    # Case E: BOM prepended -> different bytes -> FAIL.
    _write(gpath, b"\xef\xbb\xbf" + good)
    fe = verify(man, d)
    results.append(("bom_prefix_fails", len(fe) > 0, fe))
    _write(gpath, good)

    # Case F: CRLF line-ending drift -> different bytes -> FAIL.
    _write(gpath, good.replace(b"\n", b"\r\n"))
    ff = verify(man, d)
    results.append(("crlf_drift_fails", len(ff) > 0, ff))
    _write(gpath, good)

    # Case G: missing sealed file -> FAIL.
    os.remove(gpath)
    fg = verify(man, d)
    results.append(("missing_file_fails", len(fg) > 0, fg))

all_ok = all(ok for _, ok, _ in results)
for name, ok, detail in results:
    print("[%s] %s%s" % ("PASS" if ok else "FAIL", name,
                         "" if ok else " -> UNEXPECTED: " + str(detail)))
print("CI_SEAL_VS_BYTES_GATE_SELFTEST: %s" % ("PASS" if all_ok else "FAIL"))
# Emit a machine result next to the script for the packet.
out = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                   "ci-seal-vs-bytes-gate-selftest-result.json")
with open(out, "w", encoding="utf-8") as fh:
    json.dump({
        "doc": "fix7-p0-ci-seal-vs-bytes-gate-selftest-result",
        "date": "2026-06-12",
        "scope": "LOCAL_STATIC_ONLY_NO_CI_TRIGGER",
        "cases": [{"case": n, "fail_closed_as_expected": ok} for n, ok, _ in results],
        "all_fail_closed": all_ok,
        "ci_triggered": False,
        "production_contact": False,
    }, fh, indent=2, sort_keys=True, ensure_ascii=True)
    fh.write("\n")
return all_ok

def main(): if "--selftest" in sys.argv: sys.exit(0 if selftest() else 1) manifest = None root = "." args = sys.argv[1:] for i, a in enumerate(args): if a == "--manifest" and i + 1 < len(args): manifest = args[i + 1] elif a == "--root" and i + 1 < len(args): root = args[i + 1] if not manifest: print("usage: ci_seal_vs_bytes_gate.py --manifest <file> --root <dir> | --selftest") sys.exit(2) failures = verify(manifest, root) if failures: for f in failures: print("SEAL_GATE_FAIL: " + f) print("CI_SEAL_VS_BYTES_GATE: FAIL (%d) -> BLOCK" % len(failures)) sys.exit(1) print("CI_SEAL_VS_BYTES_GATE: PASS (all sealed bytes byte-exact)") sys.exit(0)

if name == "main": main()

Back to Knowledge Hub knowledge/dev/reports/architecture/fix7-p0-production-shaped-clone-rehearsal-ci-gate-packet-2026-06-12/ci_seal_vs_bytes_gate.py