KB-CFFE rev 11

dot-context-pack-build.sh rev 11 (Đ43 audit R2: Fix A+B)

60 min read Revision 11
dieu43context-packaudit-r2

#!/usr/bin/env bash

=============================================================================

dot-context-pack-build — Context Pack Builder (Đ43 "động cơ chính")

=============================================================================

@version 0.1-skeleton

@date 2026-04-17

@author Claude CLI (claude-go), S178 Fix 12 Phase 4a P1+P2

@spec Điều 43 v1.2 FINAL rev 6 §6 (8 bước) + §5 (schema rev 6 target_db) + §6.X CẤM HARDCODE

@paired-dot dot-context-pack-verify.sh (NT12 HP v4.6.2 — sẽ soạn Phase 4b)

@db-access directus (KHO): R/W context_pack_*, R dot_tools, normative_registry,

birth_registry, meta_catalog, dot_operations, dot_config

incomex_metadata (NÃO): R kb_documents (Phase 4a P4 phiên sau)

postgres catalog: pg_database (db_count khi whitelist rỗng)

@gateway psql TCP localhost:5432 (postgresql-client host-side, thay container

exec pattern post-TD-S178-23). Password từ /opt/incomex/secrets/

.env.production (Đ33 §14 SSOT, post-TD-S178-14).

Role directus (owner directus DB) cho write (PG_PASSWORD_RW)

Role context_pack_readonly cho read cross-DB (Phase 1.5, PG_PASSWORD_RO)

@exit-codes 0 OK / unchanged-skip / coalesced-skip

1 ERROR (runtime / validate / publish)

2 USAGE

3 PRECHECK_FAIL (PG / KB / infra)

4 LOCK_COALESCED (reserve, current maps to 0)

@scope Phase 4a P1+P2 = skeleton + Bước 1-4. Bước 5-8 Phase 4a P3+P4 phiên sau.

@bootstrap Đ33 §13 ngoại lệ — chạy manual trong dev, register dot_tools ở Phase 5.

=============================================================================

BƯỚC MAP (§6 Đ43):

Bước 1 precheck() — PG/KB health, git_commit 5-tier, on-deploy gate

Bước 2 try_lock() — pg_try_advisory_lock(43,1), coalesce-skip, INSERT request

Bước 3 query_pg() — law/dot/entity/species/db counts (reference tables + dot_config)

Bước 4 scan_fs() — loop dot_config.context_pack_scan_paths, mtime cache

Bước 5 generate() — render 8 section + 2 checksum (P3 phiên sau)

Bước 6 validate() — min/max size + format + compare-unchanged (P3 phiên sau)

Bước 7 publish() — 7a..7g 2-phase publish + repair (P4 phiên sau)

Bước 8 release() — pg_advisory_unlock, UPDATE request done (P4 phiên sau)

CẤM HARDCODE (§6.X): section list, folder, DB, pattern, threshold, path, size.

Mọi giá trị qua dot_config hoặc reference table PG hoặc catalog pg_*.

=============================================================================

set -euo pipefail

-----------------------------------------------------------------------------

Globals (readonly constants — không phải config runtime)

-----------------------------------------------------------------------------

readonly VERSION="1.0-rev11-d43r6-audit-r2" SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME

PG env vars (NOT readonly — populated by env_load() from .env.production)

Required after env_load: PGHOST PGPORT PG_USER_RW PG_DB_MAIN PG_PASSWORD_RW

PG_USER_RO PG_PASSWORD_RO PG_DB_NAO

readonly LOCK_NS_BUILD_CLASSID=43 readonly LOCK_NS_BUILD_OBJID=1 readonly TMPDIR="${TMPDIR:-/tmp}"

-----------------------------------------------------------------------------

Runtime state (populated at parse_args + runtime)

-----------------------------------------------------------------------------

DRY_RUN=0 REPAIR=0 VERBOSE=0 TRIGGER_SOURCE=""

shellcheck disable=SC2034 # BUILD_ID set Stage B in try_lock, used Stage B/C/P3/P4

BUILD_ID=""

shellcheck disable=SC2034 # REQUEST_ID set Stage B in try_lock, used Stage C/P4/P5

REQUEST_ID="" LOCK_HELD=0

shellcheck disable=SC2034 # OUTPUT_ROOT resolved Stage B from dot_config, used Stage B/C/P3/P4

OUTPUT_ROOT="" GIT_COMMIT="" DEDUPE_BUCKET="" LAW_COUNT=0 DOT_COUNT=0 ENTITY_COUNT=0 SPECIES_COUNT=0 DB_COUNT=0 SCANNED_FILE_COUNT=0

-----------------------------------------------------------------------------

Logging — stdout INFO/OK/SKIP/DRY, stderr WARN/ERR/FATAL

-----------------------------------------------------------------------------

log_info() { printf '[INFO] %s\n' "$"; } log_ok() { printf '[OK] %s\n' "$"; } log_skip() { printf '[SKIP] %s\n' "$"; } log_dry() { printf '[DRY] %s\n' "$"; } log_debug() { if [[ $VERBOSE -eq 1 ]]; then printf '[DEBUG] %s\n' "$"; fi; } log_warn() { printf '[WARN] %s\n' "$" >&2; } log_err() { printf '[ERR] %s\n' "$" >&2; } log_fatal() { LAST_ERR="$"; printf '[FATAL] %s\n' "$*" >&2; }

Stub for fn_log_issue (Đ35 v5.1 BLOCK 4 chưa enact — TD-S178-12 retrofit sau)

log_issue() { local severity="${1:-warn}" local category="${2:-generic}" local summary="${3:-}" log_warn "log_issue STUB: severity=${severity} category=${category} summary="${summary}"" return 0 }

-----------------------------------------------------------------------------

Cleanup trap — release advisory lock best-effort nếu hold

-----------------------------------------------------------------------------

on_exit() { local rc=$?

Bước 8 retry path: nếu build failed giữa chừng (REQUEST_ID có, MANIFEST_ID chưa set)

→ release_failure để UPDATE retry hoặc failed.

if [[ $rc -ne 0
&& -n "${REQUEST_ID:-}"
&& "${REQUEST_ID}" != "dry-run"
&& "${REPAIR:-0}" -ne 1
&& -z "${MANIFEST_ID:-}" ]]; then log_debug "trap EXIT rc=${rc} → release_failure path" release_failure "${LAST_ERR:-pipeline exit ${rc}}" || true fi if [[ $LOCK_HELD -eq 1 ]]; then log_debug "trap EXIT: releasing advisory lock (${LOCK_NS_BUILD_CLASSID},${LOCK_NS_BUILD_OBJID})" PGPASSWORD="${PG_PASSWORD_RW:-}" psql -h "${PGHOST:-127.0.0.1}" -p "${PGPORT:-5432}"
-U "${PG_USER_RW:-directus}" -d "${PG_DB_MAIN:-directus}" -At
-c "SELECT pg_advisory_unlock(${LOCK_NS_BUILD_CLASSID}, ${LOCK_NS_BUILD_OBJID});"
>/dev/null 2>&1 || true LOCK_HELD=0 fi return $rc } trap on_exit EXIT

on_err() { local line="${1:-?}" log_fatal "Error at line ${line} (exit $?)" exit 1 } trap 'on_err $LINENO' ERR

-----------------------------------------------------------------------------

PG helpers — TCP localhost (NT2 post-TD-S178-23: bỏ container-exec pattern;

user incomex không cần docker group). Password đọc từ env_load() → .env.production.

-----------------------------------------------------------------------------

run_pg_rw() { PGPASSWORD="$PG_PASSWORD_RW" psql -h "$PGHOST" -p "$PGPORT"
-U "$PG_USER_RW" -d "$PG_DB_MAIN" -At -v ON_ERROR_STOP=1 <<< "$1" }

Đ43 v1.2 rev 6 §5.7 target_db dispatch: caller truyền db name từ section_definitions

run_pg_ro_db() { local db="$1" local sql="$2" PGPASSWORD="$PG_PASSWORD_RO" psql -h "$PGHOST" -p "$PGPORT"
-U "$PG_USER_RO" -d "$db" -At -v ON_ERROR_STOP=1 <<< "$sql" }

table_exists_in_db() { local db="$1" local tbl="$2" local result result="$(run_pg_ro_db "$db" "SELECT 1 FROM information_schema.tables WHERE table_schema='public' AND table_name='${tbl}' LIMIT 1" 2>/dev/null || true)" [[ "$result" == "1" ]] }

-----------------------------------------------------------------------------

Config loader — CẤM fallback hardcode (§6.X P2 CẤM fallback khi JSONB thiếu key)

-----------------------------------------------------------------------------

dot_config_get() { local key="$1" local value value="$(run_pg_rw "SELECT value FROM dot_config WHERE key='${key}'")" if [[ -z "$value" ]]; then log_fatal "dot_config key missing: ${key} (§6.X P2 CẤM fallback)" exit 1 fi printf '%s' "$value" }

-----------------------------------------------------------------------------

Env loader — Đ33 §14 SSOT /opt/incomex/secrets/.env.production

(TD-S178-14 RESOLVED: bỏ fallback legacy env path)

Exports: PGHOST PGPORT PG_USER_RW PG_DB_MAIN PG_PASSWORD_RW

PG_USER_RO PG_PASSWORD_RO PG_DB_NAO

AGENT_DATA_URL AGENT_DATA_API_KEY

-----------------------------------------------------------------------------

env_load() { local env_file="${ENV_FILE:-/opt/incomex/secrets/.env.production}" if [[ ! -r "$env_file" ]]; then log_fatal "env file not readable (Đ33 §14 SSOT): $env_file" exit 3 fi set -a

shellcheck source=/dev/null

source "$env_file" set +a

Fail-fast: required keys cho PG + KB (§6.X CẤM fallback)

local missing=() local k for k in PGHOST PGPORT PG_USER_RW PG_DB_MAIN PG_PASSWORD_RW
PG_USER_RO PG_PASSWORD_RO PG_DB_NAO
AGENT_DATA_URL AGENT_DATA_API_KEY; do [[ -z "${!k:-}" ]] && missing+=("$k") done if [[ ${#missing[@]} -gt 0 ]]; then log_fatal "env file missing required keys: ${missing[*]} (file=${env_file})" exit 3 fi }

-----------------------------------------------------------------------------

git_commit 5-tier fallback (§6 Bước 1 giữ v1.1)

-----------------------------------------------------------------------------

detect_git_commit() {

Tier 1: git rev-parse in /opt/incomex/*-repo/

local repo sha for repo in /opt/incomex/*-repo; do [[ -d "$repo/.git" ]] || continue sha="$(git -C "$repo" rev-parse HEAD 2>/dev/null || true)" if [[ -n "$sha" ]]; then GIT_COMMIT="$sha" log_debug "git_commit tier1 from $repo" return 0 fi done

Tier 2: /opt/incomex/RELEASE_VERSION

if [[ -r /opt/incomex/RELEASE_VERSION ]]; then GIT_COMMIT="$(tr -d '[:space:]' < /opt/incomex/RELEASE_VERSION)" if [[ -n "$GIT_COMMIT" ]]; then log_debug "git_commit tier2 from RELEASE_VERSION" return 0 fi fi

Tier 3: env RELEASE_SHA

if [[ -n "${RELEASE_SHA:-}" ]]; then GIT_COMMIT="$RELEASE_SHA" log_debug "git_commit tier3 from $RELEASE_SHA" return 0 fi

Tier 4: vps_deploy_log latest (nếu bảng tồn tại)

if table_exists_in_db "$PG_DB_MAIN" "vps_deploy_log"; then sha="$(run_pg_rw "SELECT git_sha FROM vps_deploy_log ORDER BY deployed_at DESC LIMIT 1" 2>/dev/null || true)" if [[ -n "$sha" ]]; then GIT_COMMIT="$sha" log_debug "git_commit tier4 from vps_deploy_log" return 0 fi fi

Tier 5: unknown + WARN

GIT_COMMIT="unknown" log_warn "git_commit=unknown (tier 5, all fallbacks exhausted — TD-S178-17 vps_deploy_log missing)" }

-----------------------------------------------------------------------------

Đ41 §6.5 — on-deploy gate (chỉ chạy khi is_known_good=true)

-----------------------------------------------------------------------------

on_deploy_gate() { [[ "$TRIGGER_SOURCE" != "on_deploy" ]] && return 0 if ! table_exists_in_db "$PG_DB_MAIN" "vps_deploy_log"; then log_warn "on_deploy requested nhưng vps_deploy_log missing (TD-S178-17) — permissive skip gate, tiếp build" return 0 fi local known_good known_good="$(run_pg_rw "SELECT is_known_good FROM vps_deploy_log ORDER BY deployed_at DESC LIMIT 1" 2>/dev/null || echo 'f')" if [[ "$known_good" != "t" ]]; then log_skip "on_deploy: latest deploy is_known_good=${known_good} — exit 0 per Đ41 §6.5" exit 0 fi log_ok "on_deploy gate PASS (is_known_good=true)" }

-----------------------------------------------------------------------------

§5.1 Đ43 — trigger_source validate từ PG (NT2/NT4: CẤM hardcode 6 enum value).

Thêm trigger_source mới = INSERT context_trigger_sources, 0 sửa code.

-----------------------------------------------------------------------------

validate_trigger_source() { local hit hit="$(run_pg_rw "SELECT code FROM context_trigger_sources WHERE code = '${TRIGGER_SOURCE}'")" if [[ "$hit" != "$TRIGGER_SOURCE" ]]; then local valid valid="$(run_pg_rw "SELECT string_agg(code, '|' ORDER BY code) FROM context_trigger_sources")" log_fatal "trigger_source='${TRIGGER_SOURCE}' không có trong context_trigger_sources (hợp lệ: ${valid})" exit 2 fi }

-----------------------------------------------------------------------------

Đ43 §6.X NT2 — Python deps auto-verify + auto-install

Driven by dot_config.context_pack_python_deps (JSONB array). CẤM hardcode pkg list.

Thiếu → pip install --user --break-system-packages <pkg> → reimport verify.

pip fail hoặc import-after-install fail → exit 3 (PRECHECK_FAIL).

-----------------------------------------------------------------------------

check_python_deps() { local deps_json pkg deps_json="$(dot_config_get 'context_pack_python_deps')"

if [[ -z "$deps_json" || "$deps_json" == "null" || "$deps_json" == "[]" ]]; then log_info "context_pack_python_deps empty — no Python deps required" return 0 fi

local missing=() while IFS= read -r pkg; do [[ -z "$pkg" ]] && continue if python3 -c "import ${pkg}" >/dev/null 2>&1; then log_ok "python dep present: ${pkg}" else missing+=("$pkg") log_warn "python dep missing: ${pkg} — will auto-install (NT2)" fi done < <(jq -r '.[]' <<< "$deps_json")

[[ ${#missing[@]} -eq 0 ]] && return 0

for pkg in "${missing[@]}"; do log_info "pip install --user --break-system-packages ${pkg}" if ! pip install --user --break-system-packages "$pkg" >/dev/null 2>&1; then log_fatal "pip install failed: ${pkg} (check network / user-site perms / PEP 668)" exit 3 fi if ! python3 -c "import ${pkg}" >/dev/null 2>&1; then log_fatal "pip install OK but import still fails: ${pkg} (module name mismatch?)" exit 3 fi log_ok "auto-installed + import verified: ${pkg}" done }

-----------------------------------------------------------------------------

Usage

-----------------------------------------------------------------------------

usage() { cat <<USAGE Usage: ${SCRIPT_NAME} [OPTIONS]

Đ43 v1.2 rev 6 §6 — Context pack builder (8 bước).

OPTIONS: --help, -h In hướng dẫn này --dry-run Chạy 8 bước KHÔNG ghi PG live / KB / FS live --trigger-source=<code> Nguồn trigger. Validate runtime từ PG context_trigger_sources (§5.1 Đ43, NT2/NT4). Default: on_demand --repair Mode repair §6 Bước 7g (phát hiện state post_fs_pre_db_finalize → finalize hoặc rollback) --build-id=<id> Force build_id (debug / repair); default auto-generate --verbose Debug log

EXIT CODES: 0 OK (kể cả coalesced-skip / unchanged-skip) 1 ERROR 2 USAGE 3 PRECHECK_FAIL USAGE }

-----------------------------------------------------------------------------

Argument parser

-----------------------------------------------------------------------------

shellcheck disable=SC2034 # BUILD_ID / REQUEST_ID / OUTPUT_ROOT used in Stage B+C (currently stub)

parse_args() { while [[ $# -gt 0 ]]; do case "$1" in --help|-h) usage; exit 0 ;; --dry-run) DRY_RUN=1; shift ;; --repair) REPAIR=1; shift ;; --verbose) VERBOSE=1; shift ;; --trigger-source=) TRIGGER_SOURCE="${1#=}"; shift ;; --trigger-source) [[ $# -lt 2 ]] && { log_err "--trigger-source requires value"; exit 2; } TRIGGER_SOURCE="$2"; shift 2 ;; --build-id=) BUILD_ID="${1#=}"; shift ;; --build-id) [[ $# -lt 2 ]] && { log_err "--build-id requires value"; exit 2; } BUILD_ID="$2"; shift 2 ;; *) log_err "Unknown option: $1" usage >&2 exit 2 ;; esac done

TRIGGER_SOURCE="${TRIGGER_SOURCE:-on_demand}"

§5.1 Đ43 enum validate — runtime từ PG context_trigger_sources (NT2/NT4).

Gọi ở precheck sau env_load (cần PG env) — xem validate_trigger_source().

}

=============================================================================

8 BƯỚC §6 — stubs (implementation tuần tự: Stage B = 1+2, Stage C = 3+4, P3+P4 = 5-8)

=============================================================================

-----------------------------------------------------------------------------

Bước 1 §6 — PRECHECK

-----------------------------------------------------------------------------

precheck() {

1.0 Env SSOT (Đ33 §14 /opt/incomex/secrets/.env.production)

env_load log_ok "env loaded from SSOT (Đ33 §14)"

1.0.1 PG health tối thiểu để validate_trigger_source (đọc context_trigger_sources)

if ! run_pg_rw "SELECT 1" >/dev/null 2>&1; then log_fatal "PG preflight fail trước validate_trigger_source (${PGHOST}:${PGPORT})" exit 3 fi validate_trigger_source log_ok "trigger_source=${TRIGGER_SOURCE} valid (context_trigger_sources PG)"

1.1 PG health (directus DB via TCP)

if ! run_pg_rw "SELECT 1" >/dev/null 2>&1; then log_fatal "PG health fail (host=${PGHOST}:${PGPORT}, user=${PG_USER_RW}, db=${PG_DB_MAIN})" exit 3 fi log_ok "PG healthy (${PG_USER_RW}@${PGHOST}:${PGPORT}/${PG_DB_MAIN})"

1.2 OUTPUT_ROOT từ dot_config (CẤM hardcode §6.X)

OUTPUT_ROOT="$(dot_config_get 'context_pack_output_root')" log_info "OUTPUT_ROOT=${OUTPUT_ROOT} (from dot_config)"

1.3 Folder existence check (Phase 2 đã tạo 3 folders)

local dir for dir in "$OUTPUT_ROOT" "${OUTPUT_ROOT}.tmp" "${OUTPUT_ROOT}-staging"; do if [[ ! -d "$dir" ]]; then log_fatal "required folder missing: ${dir} (rerun dot-dieu43-fs-init.sh Phase 2)" exit 3 fi done log_ok "output folders exist: ${OUTPUT_ROOT}{,.tmp,-staging}"

1.4 Lock dir writable check (use tmp folder as lock dir proxy)

if [[ ! -w "${OUTPUT_ROOT}.tmp" ]]; then log_warn "lock dir not writable as $(whoami): ${OUTPUT_ROOT}.tmp (need docker/sudo for write in Bước 5)" else log_ok "lock dir writable: ${OUTPUT_ROOT}.tmp" fi

1.5 dot_operations có CONTEXT_PACK_BUILD (Phase 1 migration đã seed §5.6)

local op_count op_count="$(run_pg_rw "SELECT COUNT(*) FROM dot_operations WHERE code='CONTEXT_PACK_BUILD'")" if [[ "$op_count" != "1" ]]; then log_fatal "dot_operations missing CONTEXT_PACK_BUILD (count=${op_count}, expect 1) — Phase 1 migration incomplete" exit 3 fi log_ok "dot_operations.CONTEXT_PACK_BUILD present"

1.6 KB API health (env đã load ở 1.0)

local http_code http_code="$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10
-H "X-API-Key: ${AGENT_DATA_API_KEY}"
"${AGENT_DATA_URL}/health" 2>/dev/null || echo '000')" if [[ "$http_code" != "200" ]]; then log_fatal "KB API health fail HTTP=${http_code} (URL=${AGENT_DATA_URL})" exit 3 fi log_ok "KB API healthy (${AGENT_DATA_URL})"

1.7 git_commit 5-tier fallback

detect_git_commit log_info "git_commit=${GIT_COMMIT}"

1.8 on-deploy gate Đ41 §6.5

on_deploy_gate

1.9 Python deps (NT2: dot_config driven, CẤM hardcode §6.X)

check_python_deps }

-----------------------------------------------------------------------------

Bước 2 §6 — TRY-LOCK + coalesce-skip + INSERT request

-----------------------------------------------------------------------------

try_lock() {

2.1 Compute dedupe_bucket (hourly, per trigger)

DEDUPE_BUCKET="$(run_pg_rw "SELECT date_trunc('hour', now())::text")" log_debug "dedupe_bucket=${DEDUPE_BUCKET}"

2.2 try acquire advisory lock (43, 1) — namespace build per §6 v1.1

local got got="$(run_pg_rw "SELECT pg_try_advisory_lock(${LOCK_NS_BUILD_CLASSID}, ${LOCK_NS_BUILD_OBJID})")"

if [[ "$got" != "t" ]]; then # 2.3a Busy → coalesce skip (§6 Bước 2 v1.1) log_skip "advisory_lock(${LOCK_NS_BUILD_CLASSID},${LOCK_NS_BUILD_OBJID}) BUSY — coalesce skip" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would INSERT request (trigger=${TRIGGER_SOURCE}, bucket=${DEDUPE_BUCKET}, status=skipped, reason=coalesced)" else run_pg_rw "INSERT INTO context_pack_requests (trigger_source, dedupe_bucket, status, detail) VALUES ('${TRIGGER_SOURCE}', '${DEDUPE_BUCKET}', 'skipped', '{"reason":"coalesced"}'::jsonb)" >/dev/null || true log_ok "coalesce-skip request inserted" fi exit 0 fi

2.3b Lock acquired

LOCK_HELD=1 log_ok "advisory_lock(${LOCK_NS_BUILD_CLASSID},${LOCK_NS_BUILD_OBJID}) acquired"

2.4 Generate build_id nếu --build-id không set

if [[ -z "$BUILD_ID" ]]; then # Avoid tr|head SIGPIPE (141) under pipefail; use bash $RANDOM local _rand printf -v _rand '%04x%02x' "$RANDOM" "$((RANDOM % 256))" BUILD_ID="$(date -u +%Y%m%d-%H%M%S)-${_rand}" fi log_info "build_id=${BUILD_ID}"

2.5 INSERT request status='running' (or reuse existing row when retry test)

if [[ $DRY_RUN -eq 1 ]]; then log_dry "would INSERT request (trigger=${TRIGGER_SOURCE}, bucket=${DEDUPE_BUCKET}, status=running, build_id=${BUILD_ID})" REQUEST_ID="dry-run" elif [[ -n "${DOT_TEST_REUSE_REQUEST_ID:-}" ]]; then # Test hook: reuse existing request row (simulate retry scheduler pickup) REQUEST_ID="$DOT_TEST_REUSE_REQUEST_ID" run_pg_rw "UPDATE context_pack_requests SET status='running', started_at=now(), detail = detail || jsonb_build_object('build_id','${BUILD_ID}','retry_attempt', (retry_count+1)) WHERE id=${REQUEST_ID}" >/dev/null log_warn "DOT_TEST_REUSE_REQUEST_ID=${REQUEST_ID} → reuse existing row (test hook, skip INSERT)" else REQUEST_ID="$(run_pg_rw "INSERT INTO context_pack_requests (trigger_source, dedupe_bucket, status, started_at, detail) VALUES ('${TRIGGER_SOURCE}', '${DEDUPE_BUCKET}', 'running', now(), jsonb_build_object('build_id', '${BUILD_ID}')) RETURNING id" | head -1)" log_ok "request_id=${REQUEST_ID} inserted (status=running)" fi }

-----------------------------------------------------------------------------

Bước 3 §6 — QUERY PG (reference tables + dot_config whitelist + pg_database fallback)

Sources per prompt §3 Stage C + §6 Đ43 v1.2 rev 6:

law_count ← normative_registry

dot_count ← dot_tools

entity_count ← birth_registry

species_count ← meta_catalog

db_count ← dot_config.context_pack_scan_db_whitelist

rỗng → pg_database catalog exclude (postgres,template0,template1) [NT11]

-----------------------------------------------------------------------------

query_pg() { local db_whitelist_json whitelist_len db_whitelist_json="$(dot_config_get 'context_pack_scan_db_whitelist')" whitelist_len="$(jq 'length' <<< "$db_whitelist_json")"

if [[ "$whitelist_len" == "0" ]]; then # NT11: khai tối thiểu — pg_database catalog thay hardcode số DB DB_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT(*) FROM pg_database WHERE datname NOT IN ('template0','template1','postgres')")" log_debug "db_count: pg_database catalog (NT11, whitelist empty) → ${DB_COUNT}" else DB_COUNT="$whitelist_len" log_debug "db_count: context_pack_scan_db_whitelist length → ${DB_COUNT}" fi

Counts from authoritative reference tables (read-only role cross-DB Phase 1.5 P10)

LAW_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT() FROM normative_registry")" DOT_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT() FROM dot_tools")" ENTITY_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT() FROM birth_registry")" SPECIES_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT() FROM meta_catalog")"

log_ok "query_pg: law=${LAW_COUNT} dot=${DOT_COUNT} entity=${ENTITY_COUNT} species=${SPECIES_COUNT} db=${DB_COUNT}" }

-----------------------------------------------------------------------------

Bước 4 §6 — SCAN FS (dot_config.context_pack_scan_paths, mtime cache)

Cache: TSV per build — path\ttotal_files\tlatest_mtime_epoch

-----------------------------------------------------------------------------

scan_fs() { local paths_json paths_json="$(dot_config_get 'context_pack_scan_paths')"

local tmp_cache="${TMPDIR}/dcp-scan-${BUILD_ID:-nobuild}.tsv" : > "$tmp_cache"

local total=0 skipped=0 local path count newest while IFS= read -r path; do [[ -z "$path" ]] && continue if [[ ! -d "$path" ]]; then log_warn "scan_fs: folder missing → skip: ${path}" skipped=$((skipped + 1)) continue fi count="$(find "$path" -maxdepth 3 -type f 2>/dev/null | wc -l)" # awk avoids SIGPIPE under pipefail (processes all input, emits once) newest="$(find "$path" -maxdepth 3 -type f -printf '%T@\n' 2>/dev/null | awk '$1 > max {max=$1} END {print max+0}')" printf '%s\t%s\t%s\n' "$path" "$count" "$newest" >> "$tmp_cache" total=$((total + count)) log_debug "scan_fs: ${path} total=${count} newest_mtime=${newest}" done < <(jq -r '.[]' <<< "$paths_json")

SCANNED_FILE_COUNT="$total" log_ok "scan_fs: scanned=${SCANNED_FILE_COUNT} skipped=${skipped} cache=${tmp_cache}" }

-----------------------------------------------------------------------------

§6 Bước 5 rev 5 — strip volatile header cho logical_checksum

markdown/mermaid: sed block-delete 2 delimiter lines + 4 key lines giữa

json: jq -S 'del(._volatile_header)' + canonicalize sort keys

-----------------------------------------------------------------------------

strip_volatile_header() { local file="$1" local fmt="$2" case "$fmt" in markdown|mermaid) sed '/<!-- VOLATILE HEADER -->/,/<!-- \/VOLATILE HEADER -->/d' "$file" ;; json) jq -S 'del(._volatile_header)' "$file" ;; *) cat "$file" ;; esac }

-----------------------------------------------------------------------------

Bước 5 §6 Đ43 rev 6 — GENERATE (generic dispatcher)

-----------------------------------------------------------------------------

NT2/NT4/rev 4 compliance: 0 case-dispatch per section, 0 hardcode template,

mọi section render qua /opt/incomex/dot/lib/cp-render-section.py.

Filter test: ENV var ONLY_SECTION='code' để chạy 1 section.

Output: ${OUTPUT_ROOT}.tmp/${BUILD_ID}/<output_filename> (ephemeral staging,

promote live ở Bước 7).

-----------------------------------------------------------------------------

generate() { if [[ -z "${BUILD_ID:-}" ]]; then log_fatal "generate: BUILD_ID unset (Bước 2 try_lock phải chạy trước)" exit 1 fi

local generated_at generated_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" local out_dir="${OUTPUT_ROOT}.tmp/${BUILD_ID}" mkdir -p "$out_dir" log_info "generate out_dir=${out_dir}"

local where="" if [[ -n "${ONLY_SECTION:-}" ]]; then where=" AND code = '${ONLY_SECTION}'" log_info "generate ONLY_SECTION=${ONLY_SECTION} (single-section filter)" fi

local sections sections="$(run_pg_rw " SELECT COALESCE(json_agg(json_build_object( 'code', code, 'format', format, 'data_source', data_source, 'target_db', COALESCE(target_db, ''), 'template_kb_path', COALESCE(template_kb_path, ''), 'query_kb_path', COALESCE(query_kb_path, ''), 'render_config', render_config::text, 'output_filename', output_filename, 'min_size_bytes', min_size_bytes, 'max_size_bytes', COALESCE(max_size_bytes, 2147483647) ) ORDER BY order_index), '[]'::json) FROM context_pack_section_definitions WHERE is_active = true${where}")"

local total total="$(jq 'length' <<< "$sections")" if [[ "$total" -eq 0 ]]; then log_fatal "generate: 0 active section khớp filter (ONLY_SECTION=${ONLY_SECTION:-<none>})" exit 1 fi log_info "generate: ${total} active section(s)"

local render_log="/tmp/dcp-render-${BUILD_ID}.log" : > "$render_log" local cs_tsv="/tmp/dcp-cs-${BUILD_ID}.tsv" : > "$cs_tsv"

local i=0 ok=0 warn=0 fail=0 skip=0 while [[ $i -lt $total ]]; do local sec code fmt ds tdb tpl qry rc outname minb maxb size outfile sec="$(jq -c ".[$i]" <<< "$sections")" code="$(jq -r '.code' <<< "$sec")" fmt="$(jq -r '.format' <<< "$sec")" ds="$(jq -r '.data_source' <<< "$sec")" tdb="$(jq -r '.target_db' <<< "$sec")" tpl="$(jq -r '.template_kb_path' <<< "$sec")" qry="$(jq -r '.query_kb_path' <<< "$sec")" rc="$(jq -r '.render_config' <<< "$sec")" outname="$(jq -r '.output_filename' <<< "$sec")" minb="$(jq -r '.min_size_bytes' <<< "$sec")" maxb="$(jq -r '.max_size_bytes' <<< "$sec")" outfile="${out_dir}/${outname}"

log_info "generate[${code}] → ${outname}"
# Temporarily disable set -e while calling helper so we can capture failure per section
set +e
size="$(python3 /opt/incomex/dot/lib/cp-render-section.py \
    --code="$code" \
    --format="$fmt" \
    --data-source="$ds" \
    --target-db="$tdb" \
    --template-path="$tpl" \
    --query-path="$qry" \
    --render-config="$rc" \
    --output-file="$outfile" \
    --generated-at="$generated_at" \
    --build-id="$BUILD_ID" \
    --git-commit="$GIT_COMMIT" \
    --trigger-source="$TRIGGER_SOURCE" \
    2>>"$render_log")"
local rc_code=$?
set -e

if [[ $rc_code -eq 77 ]]; then
  log_warn "generate[${code}] SKIP data_source chưa support (exit 77)"
  skip=$((skip + 1))
elif [[ $rc_code -ne 0 ]]; then
  log_err "generate[${code}] FAIL rc=${rc_code} (log: ${render_log})"
  fail=$((fail + 1))
else
  # §6 Bước 5 — 2 checksum + line_count per section
  local logical_cs file_cs line_count
  logical_cs="$(strip_volatile_header "$outfile" "$fmt" | sha256sum | awk '{print $1}')"
  file_cs="$(sha256sum "$outfile" | awk '{print $1}')"
  line_count="$(wc -l < "$outfile")"
  printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' "$code" "$outname" "$fmt" "$size" "$line_count" "$logical_cs" "$file_cs" >> "$cs_tsv"

  if [[ "$size" -lt "$minb" ]]; then
    log_warn "generate[${code}] size=${size} < min_size_bytes=${minb} logical=${logical_cs:0:12}"
    warn=$((warn + 1))
    ok=$((ok + 1))
  elif [[ "$size" -gt "$maxb" ]]; then
    log_warn "generate[${code}] size=${size} > max_size_bytes=${maxb} logical=${logical_cs:0:12}"
    warn=$((warn + 1))
    ok=$((ok + 1))
  else
    log_ok "generate[${code}] size=${size} logical=${logical_cs:0:12} file=${file_cs:0:12}"
    ok=$((ok + 1))
  fi
fi
i=$((i + 1))

done

Finalize checksum manifest (§5.4 manifest prep, persist ở Bước 7)

Enrich với trigger_source, git_commit, counts — cho 7g repair replay không cần re-query.

local manifest="${out_dir}/.checksums.json" jq -sR
--arg bid "$BUILD_ID"
--arg gat "$generated_at"
--arg ts "$TRIGGER_SOURCE"
--arg gc "$GIT_COMMIT"
--argjson law "${LAW_COUNT:-0}"
--argjson dot "${DOT_COUNT:-0}"
--argjson ent "${ENTITY_COUNT:-0}"
--argjson sp "${SPECIES_COUNT:-0}"
--argjson db "${DB_COUNT:-0}" ' split("\n") | map(select(length>0)) | map(split("\t")) | {build_id: $bid, generated_at: $gat, trigger_source: $ts, git_commit: $gc, counts: {law: $law, dot: $dot, entity: $ent, species: $sp, db: $db}, sections: map({code:.[0], output_filename:.[1], format:.[2], size:(.[3]|tonumber), line_count:(.[4]|tonumber), logical_sha256:.[5], file_sha256:.[6]})} ' "$cs_tsv" > "$manifest" log_info "checksum manifest: ${manifest} ($(jq '.sections|length' "$manifest") sections)"

log_info "generate summary: ok=${ok} warn=${warn} skip=${skip} fail=${fail} total=${total}" if [[ $fail -gt 0 ]]; then log_fatal "generate: ${fail}/${total} section failed — xem ${render_log}" exit 1 fi log_ok "generate DONE out=${out_dir}" }

-----------------------------------------------------------------------------

Bước 6 §6 Đ43 — VALIDATE (size + format check, per section)

-----------------------------------------------------------------------------

Size: file_size ∈ [min_size_bytes, max_size_bytes] từ section_definitions

Format: markdown/mermaid → volatile header block đầy đủ (open + 4 key + close)

json → jq parse OK + _volatile_header key tồn tại

mermaid syntax → mmdc binary nếu có (else skip — soft check)

Exit 1 nếu fail >0. WARN không block.

-----------------------------------------------------------------------------

validate() { local out_dir="${OUTPUT_ROOT}.tmp/${BUILD_ID}" if [[ ! -d "$out_dir" ]]; then log_fatal "validate: out_dir không tồn tại: ${out_dir}" exit 1 fi

local sections sections="$(run_pg_rw " SELECT COALESCE(json_agg(json_build_object( 'code', code, 'format', format, 'output_filename', output_filename, 'min_size_bytes', min_size_bytes, 'max_size_bytes', COALESCE(max_size_bytes, 2147483647) ) ORDER BY order_index), '[]'::json) FROM context_pack_section_definitions WHERE is_active = true")"

local total total="$(jq 'length' <<< "$sections")" log_info "validate: ${total} active section(s)"

local has_mmdc=0 command -v mmdc >/dev/null 2>&1 && has_mmdc=1

local i=0 ok=0 warn=0 fail=0 while [[ $i -lt $total ]]; do local sec code fmt outname minb maxb outfile size sec="$(jq -c ".[$i]" <<< "$sections")" code="$(jq -r '.code' <<< "$sec")" fmt="$(jq -r '.format' <<< "$sec")" outname="$(jq -r '.output_filename' <<< "$sec")" minb="$(jq -r '.min_size_bytes' <<< "$sec")" maxb="$(jq -r '.max_size_bytes' <<< "$sec")" outfile="${out_dir}/${outname}"

# V1: file existence
if [[ ! -f "$outfile" ]]; then
  log_err "validate[${code}] file missing: ${outfile}"
  fail=$((fail + 1))
  i=$((i + 1)); continue
fi
size="$(wc -c < "$outfile")"

# V2: size bounds
local size_status="OK"
if [[ "$size" -lt "$minb" ]]; then
  log_warn "validate[${code}] size=${size} < min=${minb}"
  size_status="UNDER"
  warn=$((warn + 1))
elif [[ "$size" -gt "$maxb" ]]; then
  log_warn "validate[${code}] size=${size} > max=${maxb}"
  size_status="OVER"
  warn=$((warn + 1))
fi

# V3: format check
local fmt_status="OK"
case "$fmt" in
  markdown|mermaid)
    if ! grep -q '^<!-- VOLATILE HEADER -->$' "$outfile" \
       || ! grep -q '^<!-- /VOLATILE HEADER -->$' "$outfile"; then
      log_err "validate[${code}] missing volatile header delimiters (md/mermaid)"
      fmt_status="FAIL"
      fail=$((fail + 1))
    else
      # Đ43 §6 Bước 5 rev 5 — 4 common runtime fields bắt buộc.
      # Đ43 audit R2 Fix B — grep chỉ trong header block (giữa 2 dòng delimiter),
      # tránh false pass khi body chứa key trùng tên.
      local vh_block
      vh_block="$(sed -n '/^<!-- VOLATILE HEADER -->$/,/^<!-- \/VOLATILE HEADER -->$/p' "$outfile")"
      local missing_keys=""
      local vh_key
      for vh_key in generated_at build_id git_commit trigger_source; do
        grep -qE "^${vh_key}:[[:space:]]+[^[:space:]]" <<<"$vh_block" \
          || missing_keys="${missing_keys} ${vh_key}"
      done
      if [[ -n "$missing_keys" ]]; then
        log_err "validate[${code}] volatile header thiếu 4-field bắt buộc (trong header block):${missing_keys}"
        fmt_status="FAIL"
        fail=$((fail + 1))
      fi
    fi
    # mermaid extra: nếu có mmdc, thử parse
    if [[ "$fmt" == "mermaid" && $has_mmdc -eq 1 ]]; then
      if ! mmdc -i "$outfile" -o /tmp/dcp-mmd-probe-${BUILD_ID}.svg >/dev/null 2>&1; then
        log_err "validate[${code}] mmdc parse fail"
        fmt_status="FAIL"
        fail=$((fail + 1))
      fi
      rm -f /tmp/dcp-mmd-probe-${BUILD_ID}.svg
    fi
    ;;
  json)
    if ! jq empty "$outfile" >/dev/null 2>&1; then
      log_err "validate[${code}] jq parse fail"
      fmt_status="FAIL"
      fail=$((fail + 1))
    elif ! jq -e 'has("_volatile_header")' "$outfile" >/dev/null 2>&1; then
      log_err "validate[${code}] _volatile_header key missing"
      fmt_status="FAIL"
      fail=$((fail + 1))
    elif ! jq -e '._volatile_header | has("generated_at") and has("build_id") and has("git_commit") and has("trigger_source")' "$outfile" >/dev/null 2>&1; then
      log_err "validate[${code}] _volatile_header thiếu 4-field bắt buộc (generated_at/build_id/git_commit/trigger_source)"
      fmt_status="FAIL"
      fail=$((fail + 1))
    fi
    ;;
  *)
    log_warn "validate[${code}] unknown format=${fmt}"
    fmt_status="UNKNOWN"
    ;;
esac

if [[ "$size_status" == "OK" && "$fmt_status" == "OK" ]]; then
  log_ok "validate[${code}] size=${size} (${minb}≤x≤${maxb}) format=${fmt} PASS"
  ok=$((ok + 1))
fi
i=$((i + 1))

done

log_info "validate summary: ok=${ok} warn=${warn} fail=${fail} total=${total}" if [[ $fail -gt 0 ]]; then log_fatal "validate: ${fail}/${total} section failed" exit 1 fi log_ok "validate DONE" }

=============================================================================

Bước 7 §6 Đ43 — 2-PHASE PUBLISH (7a-7g)

Sub-step map:

7a COPY build_dir → staging_dir

7b VALIDATE staging file_sha256 match manifest (no drift)

7c SWAP SYMLINK OUTPUT_ROOT/current → staging/BUILD_ID (atomic via mv -T)

7d VERIFY readlink matches expected staging path

7e KB MIRROR upload 8 section lên Agent Data API path context-pack/

7f PG INSERT context_pack_manifest + 8 context_pack_sections (1 TX Đ35 §5.1)

7g REPAIR detect post_fs_pre_db state → finalize hoặc rollback

Pattern symlink: OUTPUT_ROOT là dir owned by incomex → OUTPUT_ROOT/current symlink

(tránh cần write trên /opt/incomex/ parent root-owned).

Atomic rename: mv -Tf trên cùng FS.

=============================================================================

publish() { publish_fs # 7a-7d

DEBUG HOOK (test-only): exit after 7d để simulate crash pre-KB/DB

if [[ "${DOT_TEST_STOP_AFTER:-}" == "7d" ]]; then log_warn "DOT_TEST_STOP_AFTER=7d → exit after 7d (test hook, repair mode expected next)" exit 0 fi publish_kb # 7e publish_db # 7f (sets MANIFEST_ID global) }

-----------------------------------------------------------------------------

Bước 7a-7d — FS-only publish (staging → live symlink)

-----------------------------------------------------------------------------

publish_fs() { local src_dir="${OUTPUT_ROOT}.tmp/${BUILD_ID}" local staging_dir="${OUTPUT_ROOT}-staging/${BUILD_ID}" local live_link="${OUTPUT_ROOT}/current"

if [[ ! -d "$src_dir" ]]; then log_fatal "publish 7a: build dir missing: ${src_dir}" exit 1 fi

---------------------------------------------------------------------------

7a COPY build → staging

---------------------------------------------------------------------------

log_info "7a COPY ${src_dir}/ → ${staging_dir}/" if [[ $DRY_RUN -eq 1 ]]; then log_dry "rsync -a ${src_dir}/ ${staging_dir}/ (skip FS write)" else mkdir -p "$staging_dir" rsync -a "${src_dir}/" "${staging_dir}/" local n_copied n_copied="$(find "$staging_dir" -maxdepth 1 -type f 2>/dev/null | wc -l)" log_ok "7a COPY done (${n_copied} files in staging)" fi

---------------------------------------------------------------------------

7b VALIDATE staging matches manifest (no drift)

---------------------------------------------------------------------------

log_info "7b VALIDATE staging checksums" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would recompute sha256 cho từng staging file, so với .checksums.json" else local manifest="${staging_dir}/.checksums.json" if [[ ! -f "$manifest" ]]; then log_fatal "7b manifest missing: ${manifest}" exit 1 fi local mismatches=0 total=0 local section_json code outname expected actual while IFS= read -r section_json; do total=$((total + 1)) code="$(jq -r '.code' <<< "$section_json")" outname="$(jq -r '.output_filename' <<< "$section_json")" expected="$(jq -r '.file_sha256' <<< "$section_json")" if [[ ! -f "${staging_dir}/${outname}" ]]; then log_err "7b staging missing file: ${outname}" mismatches=$((mismatches + 1)) continue fi actual="$(sha256sum "${staging_dir}/${outname}" | awk '{print $1}')" if [[ "$actual" != "$expected" ]]; then log_err "7b drift ${code}: manifest=${expected:0:12} staging=${actual:0:12}" mismatches=$((mismatches + 1)) fi done < <(jq -c '.sections[]' "$manifest") if [[ $mismatches -gt 0 ]]; then log_fatal "7b staging drift: ${mismatches}/${total} file(s) mismatch (§5.4 file_checksum)" exit 1 fi log_ok "7b staging verified: ${total}/${total} file_sha256 match manifest" fi

---------------------------------------------------------------------------

7c SWAP live symlink (atomic mv -T trên cùng FS)

---------------------------------------------------------------------------

log_info "7c SWAP ${live_link} → ${staging_dir}" if [[ $DRY_RUN -eq 1 ]]; then log_dry "ln -sfn ${staging_dir} ${live_link}.new && mv -Tf ${live_link}.new ${live_link}" else if [[ ! -d "${OUTPUT_ROOT}" ]]; then log_fatal "7c OUTPUT_ROOT missing: ${OUTPUT_ROOT}" exit 1 fi local tmp_link="${live_link}.new.$$" ln -sfn "$staging_dir" "$tmp_link" mv -Tf "$tmp_link" "$live_link" log_ok "7c symlink swapped (atomic rename via mv -T)" fi

---------------------------------------------------------------------------

7d VERIFY symlink target

---------------------------------------------------------------------------

log_info "7d VERIFY symlink target" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would readlink ${live_link} và compare to ${staging_dir}" else if [[ ! -L "$live_link" ]]; then log_fatal "7d ${live_link} không phải symlink" exit 1 fi local actual_target actual_target="$(readlink "$live_link")" if [[ "$actual_target" != "$staging_dir" ]]; then log_fatal "7d symlink wrong target: expected=${staging_dir} actual=${actual_target}" exit 1 fi local probe="${live_link}/PROJECT_MAP.md" if [[ ! -r "$probe" ]]; then log_warn "7d probe file unreadable qua symlink: ${probe}" fi log_ok "7d symlink verified: ${live_link} -> ${actual_target}" fi }

-----------------------------------------------------------------------------

Bước 7e — KB MIRROR: upload 8 section lên Agent Data API

Path: context-pack/<BUILD_ID>/<output_filename>. Storage key: context-pack__BID__name

-----------------------------------------------------------------------------

publish_kb() { local staging_dir="${OUTPUT_ROOT}-staging/${BUILD_ID}" local manifest_file="${staging_dir}/.checksums.json"

log_info "7e KB MIRROR upload to ${AGENT_DATA_URL}/documents" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would POST 8 section → context-pack/${BUILD_ID}/*" KB_MIRROR_OK=1 return 0 fi if [[ ! -f "$manifest_file" ]]; then log_fatal "7e manifest file missing: ${manifest_file}" exit 1 fi

local ok=0 fail=0 local section_json code outname fmt body mime doc_id resp http_code while IFS= read -r section_json; do code="$(jq -r '.code' <<< "$section_json")" outname="$(jq -r '.output_filename' <<< "$section_json")" fmt="$(jq -r '.format' <<< "$section_json")" # Mime theo format (chuẩn hoá output, không suy từ extension để tránh drift) case "$fmt" in markdown) mime="text/markdown" ;; json) mime="application/json" ;; mermaid) mime="text/plain" ;; *) mime="text/plain" ;; esac doc_id="context-pack/${BUILD_ID}/${outname}" body="$(cat "${staging_dir}/${outname}")" # Build JSON payload via jq (escape-safe) local payload payload="$(jq -n
--arg did "$doc_id"
--arg mime "$mime"
--arg body "$body"
--arg title "Đ43 context-pack ${BUILD_ID} section=${code}"
--arg src "dieu43_context_pack_publish"
--arg bid "$BUILD_ID"
'{document_id:$did, parent_id:"root", content:{mime_type:$mime, body:$body}, metadata:{title:$title, tags:["dieu43","context-pack","build"], source:$src, build_id:$bid}}')" resp="$(curl -sS -o /tmp/dcp-kb-${BUILD_ID}-${code}.resp -w '%{http_code}'
--max-time 30
-X POST
-H "Content-Type: application/json"
-H "X-API-Key: ${AGENT_DATA_API_KEY}"
--data-binary @-
"${AGENT_DATA_URL}/documents?upsert=true" <<< "$payload" 2>/dev/null)" http_code="$resp" if [[ "$http_code" == "200" || "$http_code" == "201" ]]; then log_ok "7e uploaded ${code} → ${doc_id} HTTP=${http_code}" ok=$((ok + 1)) rm -f /tmp/dcp-kb-${BUILD_ID}-${code}.resp else log_err "7e upload FAIL ${code} → ${doc_id} HTTP=${http_code} body=$(head -c 200 /tmp/dcp-kb-${BUILD_ID}-${code}.resp 2>/dev/null)" fail=$((fail + 1)) fi done < <(jq -c '.sections[]' "$manifest_file")

log_info "7e KB mirror summary: ok=${ok} fail=${fail}" if [[ $fail -gt 0 ]]; then KB_MIRROR_OK=0 log_fatal "7e KB mirror: ${fail} section fail" exit 1 fi KB_MIRROR_OK=1 log_ok "7e KB mirror DONE (${ok}/8 sections live)" }

-----------------------------------------------------------------------------

Bước 7f — PG INSERT context_pack_manifest + 8 context_pack_sections (1 TX)

Đ35 §5.1 CẤM partial: manifest + 8 sections trong cùng transaction.

Sets MANIFEST_ID global cho release().

-----------------------------------------------------------------------------

publish_db() { local staging_dir="${OUTPUT_ROOT}-staging/${BUILD_ID}" local manifest_file="${staging_dir}/.checksums.json"

log_info "7f PG INSERT manifest + sections (1 TX)" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would INSERT context_pack_manifest + 8 context_pack_sections + capture manifest_id" MANIFEST_ID="dry-run" return 0 fi if [[ ! -f "$manifest_file" ]]; then log_fatal "7f manifest file missing: ${manifest_file}" exit 1 fi

Aggregate checksums (sort by order_index in section_definitions)

Use section code ordering as proxy since manifest JSON preserves order from section_definitions ORDER BY order_index

local agg_logical agg_file total_size sec_count agg_logical="$(jq -r '.sections | map(.logical_sha256) | join("")' "$manifest_file" | sha256sum | awk '{print $1}')" agg_file="$(jq -r '.sections | map(.file_sha256) | join("")' "$manifest_file" | sha256sum | awk '{print $1}')" total_size="$(jq '[.sections[].size] | add' "$manifest_file")" sec_count="$(jq '.sections | length' "$manifest_file")"

local generated_at git_commit trigger_source local law_count dot_count entity_count species_count db_count generated_at="$(jq -r '.generated_at' "$manifest_file")" git_commit="$(jq -r '.git_commit' "$manifest_file")" trigger_source="$(jq -r '.trigger_source' "$manifest_file")" law_count="$(jq -r '.counts.law' "$manifest_file")" dot_count="$(jq -r '.counts.dot' "$manifest_file")" entity_count="$(jq -r '.counts.entity' "$manifest_file")" species_count="$(jq -r '.counts.species' "$manifest_file")" db_count="$(jq -r '.counts.db' "$manifest_file")"

local kb_status="live" [[ "${KB_MIRROR_OK:-0}" -ne 1 ]] && kb_status="failed" local publish_status="live" # 7c-7d đã PASS nếu đến đây local health_status="healthy" # validate PASS nếu đến đây

Build VALUES tuples cho 8 sections (s(section_code, file_path, kb_document_path, size, line_count, logical, file_checksum))

local sections_values sections_values="$(jq -r
--arg sdir "$staging_dir"
--arg bid "$BUILD_ID" ' .sections | map( "(" + (.code | @json | gsub("""; "\u0027")) + ", " + (($sdir + "/" + .output_filename) | @json | gsub("""; "\u0027")) + ", " + (("context-pack__" + $bid + "__" + .output_filename) | @json | gsub("""; "\u0027")) + ", " + (.size | tostring) + ", " + (.line_count | tostring) + ", " + (.logical_sha256 | @json | gsub("""; "\u0027")) + ", " + (.file_sha256 | @json | gsub("""; "\u0027")) + ")" ) | join(",\n ") ' "$manifest_file")"

Escape single quotes trong git_commit (unlikely nhưng safe)

local git_commit_sql="${git_commit//'/''}"

CTE-based single statement (implicit TX), RETURNING manifest_id

local sql sql="$(cat <<SQL WITH new_manifest AS ( INSERT INTO context_pack_manifest( generated_at, published_at, git_commit, trigger_source, law_count, dot_count, entity_count, species_count, db_count, total_size_bytes, section_count, logical_checksum_sha256, file_checksum_sha256, publish_status, kb_mirror_status, publish_step, health_status, _dot_origin ) VALUES ( '${generated_at}', now(), '${git_commit_sql}', '${trigger_source}', ${law_count}, ${dot_count}, ${entity_count}, ${species_count}, ${db_count}, ${total_size}, ${sec_count}, '${agg_logical}', '${agg_file}', '${publish_status}', '${kb_status}', 'done', '${health_status}', 'dieu43_v1_2_rev6_publish' ) RETURNING id ), inserted_sections AS ( INSERT INTO context_pack_sections( manifest_id, section_code, file_path, kb_document_path, size_bytes, line_count, logical_checksum_sha256, file_checksum_sha256 ) SELECT nm.id, s.section_code, s.file_path, s.kb_document_path, s.size_bytes, s.line_count, s.logical_checksum_sha256, s.file_checksum_sha256 FROM new_manifest nm CROSS JOIN (VALUES ${sections_values} ) AS s(section_code, file_path, kb_document_path, size_bytes, line_count, logical_checksum_sha256, file_checksum_sha256) RETURNING id ) SELECT id FROM new_manifest; SQL )"

local err_log="/tmp/dcp-7f-${BUILD_ID}.err" local out rc out="$(PGPASSWORD="$PG_PASSWORD_RW" LC_ALL=C.UTF-8 LANG=C.UTF-8
psql -h "$PGHOST" -p "$PGPORT"
-U "$PG_USER_RW" -d "$PG_DB_MAIN" -tAXq -v ON_ERROR_STOP=1
-c "$sql" 2>"$err_log")" rc=$? if [[ $rc -ne 0 ]]; then log_fatal "7f psql failed (rc=${rc}): $(cat "$err_log" | tail -5)" exit 1 fi MANIFEST_ID="$(echo "$out" | tr -d '[:space:]')" if ! [[ "$MANIFEST_ID" =~ ^[0-9]+$ ]]; then log_fatal "7f unexpected psql output stdout='${out}' stderr='$(cat "$err_log")'" exit 1 fi rm -f "$err_log" log_ok "7f INSERT DONE manifest_id=${MANIFEST_ID} sections=${sec_count} (1 TX via CTE)" }

-----------------------------------------------------------------------------

Bước 8 §6 Đ43 — RELEASE (minimal, retry logic ở P4c)

UPDATE context_pack_requests status='done', manifest_id, finished_at.

-----------------------------------------------------------------------------

release() { if [[ $DRY_RUN -eq 1 ]]; then log_dry "would UPDATE request status=done, manifest_id=${MANIFEST_ID:-?}, finished_at=now()" return 0 fi if [[ -z "${REQUEST_ID:-}" || "$REQUEST_ID" == "dry-run" ]]; then log_warn "release: REQUEST_ID unset — skip UPDATE" return 0 fi if [[ -z "${MANIFEST_ID:-}" || "$MANIFEST_ID" == "dry-run" ]]; then log_fatal "release: MANIFEST_ID unset — Bước 7f failed?" exit 1 fi run_pg_rw "UPDATE context_pack_requests SET status='done', manifest_id=${MANIFEST_ID}, finished_at=now(), last_error=NULL WHERE id=${REQUEST_ID}" >/dev/null log_ok "release DONE request_id=${REQUEST_ID} manifest_id=${MANIFEST_ID} status=done" }

-----------------------------------------------------------------------------

Bước 8 §6 Đ43 — RELEASE FAILURE path (retry hoặc failed)

Driven by dot_config.context_pack_retry_policy = {max_retries, backoff_seconds[]}.

Được gọi từ on_exit trap khi build fail giữa chừng (chưa có MANIFEST_ID).

-----------------------------------------------------------------------------

release_failure() { local err_msg="${1:-unknown}" if [[ "${DRY_RUN:-0}" -eq 1 ]]; then log_dry "would release_failure err='${err_msg}'" return 0 fi if [[ -z "${REQUEST_ID:-}" || "$REQUEST_ID" == "dry-run" ]]; then return 0 fi

local policy max_retries current_retry new_retry policy="$(dot_config_get 'context_pack_retry_policy')" max_retries="$(jq -r '.max_retries' <<< "$policy")"

current_retry="$(run_pg_rw "SELECT retry_count FROM context_pack_requests WHERE id=${REQUEST_ID}")" [[ -z "$current_retry" ]] && current_retry=0 new_retry=$((current_retry + 1))

Escape single quotes trong error message cho SQL

local err_sql="${err_msg//'/''}"

Truncate long errors

[[ ${#err_sql} -gt 500 ]] && err_sql="${err_sql:0:500}..."

local new_status next_at_sql

§5.9 seed: max_retries=3 + backoff_seconds 3 entries → 3 retries total.

new_retry là attempt tiếp theo (1..max_retries = retry slot, >max → failed permanent)

if [[ $new_retry -le $max_retries ]]; then local idx backoff_s # backoff_seconds[new_retry - 1] (1st retry = index 0, 2nd = index 1, 3rd = index 2) idx=$((new_retry - 1)) backoff_s="$(jq -r ".backoff_seconds[${idx}] // .backoff_seconds[-1] // 300" <<< "$policy")" next_at_sql="now() + interval '${backoff_s} seconds'" new_status="pending" log_warn "release_failure: retry ${new_retry}/${max_retries} scheduled +${backoff_s}s — err='${err_msg}'" else next_at_sql="NULL" new_status="failed" log_err "release_failure: max_retries hit (${new_retry}>=${max_retries}) → status=failed — err='${err_msg}'" log_issue "CRITICAL" "context_pack_build_max_retries" "Build failed after ${new_retry} attempts: ${err_msg}" fi

run_pg_rw "UPDATE context_pack_requests SET status='${new_status}', retry_count=${new_retry}, next_retry_at=${next_at_sql}, last_error='${err_sql}', finished_at=now() WHERE id=${REQUEST_ID}" >/dev/null log_ok "release_failure DONE request_id=${REQUEST_ID} status=${new_status} retry_count=${new_retry}" }

-----------------------------------------------------------------------------

Bước 7g — REPAIR mode §6 Đ43

Detect state qua current symlink + PG request.manifest_id:

(A) symlink + request running + manifest_id=NULL → state post_fs_pre_db → FINALIZE

(B) symlink + request done + manifest_id set → nothing to do

(C) request failed → ROLLBACK (Phase 5)

-----------------------------------------------------------------------------

repair_publish() {

Đ43 audit R2 Fix A — repair KHÔNG bypass verify infra.

precheck() tự env_load + PG health + OUTPUT_ROOT từ dot_config + folder/operation check.

precheck log_ok "repair: precheck PASS (env + PG + OUTPUT_ROOT + folders + dot_operations)"

local live_link="${OUTPUT_ROOT}/current" if [[ ! -L "$live_link" ]]; then log_fatal "repair: ${live_link} không phải symlink — không có state để repair" exit 1 fi local target build_id_live target="$(readlink "$live_link")" build_id_live="$(basename "$target")" log_info "repair: symlink → ${target} (build_id=${build_id_live})"

if [[ ! -d "$target" ]]; then log_fatal "repair: dangling symlink, target missing: ${target}" exit 1 fi local manifest_file="${target}/.checksums.json" if [[ ! -f "$manifest_file" ]]; then log_fatal "repair: không có .checksums.json trong target" exit 1 fi

Đ43 audit R2 Fix A — manifest integrity gate: file size > 0 + JSON parse OK.

Fail → log FATAL + exit, KHÔNG gọi release() với manifest corrupt.

if [[ ! -s "$manifest_file" ]]; then log_fatal "repair: manifest integrity FAIL — ${manifest_file} empty (size=0)" exit 1 fi if ! jq empty "$manifest_file" >/dev/null 2>&1; then log_fatal "repair: manifest integrity FAIL — ${manifest_file} not valid JSON (jq parse fail)" exit 1 fi log_ok "repair: manifest integrity OK (size>0 + jq parse PASS)"

Lookup request row

local pg_row req_id req_status req_mid pg_row="$(run_pg_rw "SELECT id || '|' || status || '|' || COALESCE(manifest_id::text, '') FROM context_pack_requests WHERE detail->>'build_id' = '${build_id_live}' ORDER BY id DESC LIMIT 1")" if [[ -z "$pg_row" ]]; then log_fatal "repair: không tìm thấy context_pack_requests cho build_id=${build_id_live}" exit 1 fi IFS='|' read -r req_id req_status req_mid <<< "$pg_row" log_info "repair: request id=${req_id} status=${req_status} manifest_id=${req_mid:-NULL}"

if [[ "$req_status" == "done" && -n "$req_mid" ]]; then log_ok "repair: already complete (manifest_id=${req_mid}) — nothing to do" return 0 fi if [[ "$req_status" == "failed" ]]; then log_warn "repair: request=failed — ROLLBACK path (Phase 5 sẽ implement); exit" exit 0 fi

Gap 7f→8: 7f đã INSERT manifest nhưng release() chưa UPDATE status → close ra

if [[ "$req_status" == "running" && -n "$req_mid" ]]; then log_info "repair: state=post_db_pre_release (manifest_id=${req_mid} có, status vẫn running) → close release" MANIFEST_ID="$req_mid" REQUEST_ID="$req_id" release log_ok "repair CLOSE DONE manifest_id=${MANIFEST_ID} request_id=${REQUEST_ID}" return 0 fi if [[ "$req_status" != "running" || -n "$req_mid" ]]; then log_fatal "repair: ambiguous state req_status=${req_status} manifest_id=${req_mid}" exit 1 fi

FINALIZE path: state post_fs_pre_db → replay 7e + 7f + release

log_info "repair: state=post_fs_pre_db → FINALIZE (run 7e + 7f + release)" REQUEST_ID="$req_id" BUILD_ID="$build_id_live" TRIGGER_SOURCE="$(jq -r '.trigger_source' "$manifest_file")" GIT_COMMIT="$(jq -r '.git_commit' "$manifest_file")" LAW_COUNT="$(jq -r '.counts.law' "$manifest_file")" DOT_COUNT="$(jq -r '.counts.dot' "$manifest_file")" ENTITY_COUNT="$(jq -r '.counts.entity' "$manifest_file")" SPECIES_COUNT="$(jq -r '.counts.species' "$manifest_file")" DB_COUNT="$(jq -r '.counts.db' "$manifest_file")"

publish_kb publish_db release log_ok "repair FINALIZE DONE manifest_id=${MANIFEST_ID} request_id=${REQUEST_ID}" }

=============================================================================

Main flow

=============================================================================

main() { parse_args "$@"

log_info "${SCRIPT_NAME} v${VERSION}" log_info "trigger_source=${TRIGGER_SOURCE} dry_run=${DRY_RUN} repair=${REPAIR} verbose=${VERBOSE}"

if [[ $REPAIR -eq 1 ]]; then log_info "=== REPAIR MODE (§6 Bước 7g) ===" repair_publish log_ok "${SCRIPT_NAME} (repair) completed" exit 0 fi

log_info "=== Bước 1 PRECHECK ===" precheck

log_info "=== Bước 2 TRY-LOCK ===" try_lock

log_info "=== Bước 3 QUERY PG ===" query_pg

log_info "=== Bước 4 SCAN FS ===" scan_fs

log_info "=== Bước 5 GENERATE + 2 CHECKSUM ===" generate

log_info "=== Bước 6 VALIDATE ===" validate

log_info "=== Bước 7 PUBLISH ===" publish

log_info "=== Bước 8 RELEASE ===" release

log_ok "${SCRIPT_NAME} completed (trigger=${TRIGGER_SOURCE} dry_run=${DRY_RUN})" exit 0 }

main "$@"