dot-context-pack-build.sh rev 11 (Đ43 audit R2: Fix A+B)
#!/usr/bin/env bash
=============================================================================
dot-context-pack-build — Context Pack Builder (Đ43 "động cơ chính")
=============================================================================
@version 0.1-skeleton
@date 2026-04-17
@author Claude CLI (claude-go), S178 Fix 12 Phase 4a P1+P2
@spec Điều 43 v1.2 FINAL rev 6 §6 (8 bước) + §5 (schema rev 6 target_db) + §6.X CẤM HARDCODE
@paired-dot dot-context-pack-verify.sh (NT12 HP v4.6.2 — sẽ soạn Phase 4b)
@db-access directus (KHO): R/W context_pack_*, R dot_tools, normative_registry,
birth_registry, meta_catalog, dot_operations, dot_config
incomex_metadata (NÃO): R kb_documents (Phase 4a P4 phiên sau)
postgres catalog: pg_database (db_count khi whitelist rỗng)
@gateway psql TCP localhost:5432 (postgresql-client host-side, thay container
exec pattern post-TD-S178-23). Password từ /opt/incomex/secrets/
.env.production (Đ33 §14 SSOT, post-TD-S178-14).
Role directus (owner directus DB) cho write (PG_PASSWORD_RW)
Role context_pack_readonly cho read cross-DB (Phase 1.5, PG_PASSWORD_RO)
@exit-codes 0 OK / unchanged-skip / coalesced-skip
1 ERROR (runtime / validate / publish)
2 USAGE
3 PRECHECK_FAIL (PG / KB / infra)
4 LOCK_COALESCED (reserve, current maps to 0)
@scope Phase 4a P1+P2 = skeleton + Bước 1-4. Bước 5-8 Phase 4a P3+P4 phiên sau.
@bootstrap Đ33 §13 ngoại lệ — chạy manual trong dev, register dot_tools ở Phase 5.
=============================================================================
BƯỚC MAP (§6 Đ43):
Bước 1 precheck() — PG/KB health, git_commit 5-tier, on-deploy gate
Bước 2 try_lock() — pg_try_advisory_lock(43,1), coalesce-skip, INSERT request
Bước 3 query_pg() — law/dot/entity/species/db counts (reference tables + dot_config)
Bước 4 scan_fs() — loop dot_config.context_pack_scan_paths, mtime cache
Bước 5 generate() — render 8 section + 2 checksum (P3 phiên sau)
Bước 6 validate() — min/max size + format + compare-unchanged (P3 phiên sau)
Bước 7 publish() — 7a..7g 2-phase publish + repair (P4 phiên sau)
Bước 8 release() — pg_advisory_unlock, UPDATE request done (P4 phiên sau)
CẤM HARDCODE (§6.X): section list, folder, DB, pattern, threshold, path, size.
Mọi giá trị qua dot_config hoặc reference table PG hoặc catalog pg_*.
=============================================================================
set -euo pipefail
-----------------------------------------------------------------------------
Globals (readonly constants — không phải config runtime)
-----------------------------------------------------------------------------
readonly VERSION="1.0-rev11-d43r6-audit-r2" SCRIPT_NAME="$(basename "$0")" readonly SCRIPT_NAME
PG env vars (NOT readonly — populated by env_load() from .env.production)
Required after env_load: PGHOST PGPORT PG_USER_RW PG_DB_MAIN PG_PASSWORD_RW
PG_USER_RO PG_PASSWORD_RO PG_DB_NAO
readonly LOCK_NS_BUILD_CLASSID=43 readonly LOCK_NS_BUILD_OBJID=1 readonly TMPDIR="${TMPDIR:-/tmp}"
-----------------------------------------------------------------------------
Runtime state (populated at parse_args + runtime)
-----------------------------------------------------------------------------
DRY_RUN=0 REPAIR=0 VERBOSE=0 TRIGGER_SOURCE=""
shellcheck disable=SC2034 # BUILD_ID set Stage B in try_lock, used Stage B/C/P3/P4
BUILD_ID=""
shellcheck disable=SC2034 # REQUEST_ID set Stage B in try_lock, used Stage C/P4/P5
REQUEST_ID="" LOCK_HELD=0
shellcheck disable=SC2034 # OUTPUT_ROOT resolved Stage B from dot_config, used Stage B/C/P3/P4
OUTPUT_ROOT="" GIT_COMMIT="" DEDUPE_BUCKET="" LAW_COUNT=0 DOT_COUNT=0 ENTITY_COUNT=0 SPECIES_COUNT=0 DB_COUNT=0 SCANNED_FILE_COUNT=0
-----------------------------------------------------------------------------
Logging — stdout INFO/OK/SKIP/DRY, stderr WARN/ERR/FATAL
-----------------------------------------------------------------------------
log_info() { printf '[INFO] %s\n' "$"; } log_ok() { printf '[OK] %s\n' "$"; } log_skip() { printf '[SKIP] %s\n' "$"; } log_dry() { printf '[DRY] %s\n' "$"; } log_debug() { if [[ $VERBOSE -eq 1 ]]; then printf '[DEBUG] %s\n' "$"; fi; } log_warn() { printf '[WARN] %s\n' "$" >&2; } log_err() { printf '[ERR] %s\n' "$" >&2; } log_fatal() { LAST_ERR="$"; printf '[FATAL] %s\n' "$*" >&2; }
Stub for fn_log_issue (Đ35 v5.1 BLOCK 4 chưa enact — TD-S178-12 retrofit sau)
log_issue() { local severity="${1:-warn}" local category="${2:-generic}" local summary="${3:-}" log_warn "log_issue STUB: severity=${severity} category=${category} summary="${summary}"" return 0 }
-----------------------------------------------------------------------------
Cleanup trap — release advisory lock best-effort nếu hold
-----------------------------------------------------------------------------
on_exit() { local rc=$?
Bước 8 retry path: nếu build failed giữa chừng (REQUEST_ID có, MANIFEST_ID chưa set)
→ release_failure để UPDATE retry hoặc failed.
if [[ $rc -ne 0
&& -n "${REQUEST_ID:-}"
&& "${REQUEST_ID}" != "dry-run"
&& "${REPAIR:-0}" -ne 1
&& -z "${MANIFEST_ID:-}" ]]; then
log_debug "trap EXIT rc=${rc} → release_failure path"
release_failure "${LAST_ERR:-pipeline exit ${rc}}" || true
fi
if [[ $LOCK_HELD -eq 1 ]]; then
log_debug "trap EXIT: releasing advisory lock (${LOCK_NS_BUILD_CLASSID},${LOCK_NS_BUILD_OBJID})"
PGPASSWORD="${PG_PASSWORD_RW:-}" psql -h "${PGHOST:-127.0.0.1}" -p "${PGPORT:-5432}"
-U "${PG_USER_RW:-directus}" -d "${PG_DB_MAIN:-directus}" -At
-c "SELECT pg_advisory_unlock(${LOCK_NS_BUILD_CLASSID}, ${LOCK_NS_BUILD_OBJID});"
>/dev/null 2>&1 || true
LOCK_HELD=0
fi
return $rc
}
trap on_exit EXIT
on_err() { local line="${1:-?}" log_fatal "Error at line ${line} (exit $?)" exit 1 } trap 'on_err $LINENO' ERR
-----------------------------------------------------------------------------
PG helpers — TCP localhost (NT2 post-TD-S178-23: bỏ container-exec pattern;
user incomex không cần docker group). Password đọc từ env_load() → .env.production.
-----------------------------------------------------------------------------
run_pg_rw() {
PGPASSWORD="$PG_PASSWORD_RW" psql -h "$PGHOST" -p "$PGPORT"
-U "$PG_USER_RW" -d "$PG_DB_MAIN" -At -v ON_ERROR_STOP=1 <<< "$1"
}
Đ43 v1.2 rev 6 §5.7 target_db dispatch: caller truyền db name từ section_definitions
run_pg_ro_db() {
local db="$1"
local sql="$2"
PGPASSWORD="$PG_PASSWORD_RO" psql -h "$PGHOST" -p "$PGPORT"
-U "$PG_USER_RO" -d "$db" -At -v ON_ERROR_STOP=1 <<< "$sql"
}
table_exists_in_db() { local db="$1" local tbl="$2" local result result="$(run_pg_ro_db "$db" "SELECT 1 FROM information_schema.tables WHERE table_schema='public' AND table_name='${tbl}' LIMIT 1" 2>/dev/null || true)" [[ "$result" == "1" ]] }
-----------------------------------------------------------------------------
Config loader — CẤM fallback hardcode (§6.X P2 CẤM fallback khi JSONB thiếu key)
-----------------------------------------------------------------------------
dot_config_get() { local key="$1" local value value="$(run_pg_rw "SELECT value FROM dot_config WHERE key='${key}'")" if [[ -z "$value" ]]; then log_fatal "dot_config key missing: ${key} (§6.X P2 CẤM fallback)" exit 1 fi printf '%s' "$value" }
-----------------------------------------------------------------------------
Env loader — Đ33 §14 SSOT /opt/incomex/secrets/.env.production
(TD-S178-14 RESOLVED: bỏ fallback legacy env path)
Exports: PGHOST PGPORT PG_USER_RW PG_DB_MAIN PG_PASSWORD_RW
PG_USER_RO PG_PASSWORD_RO PG_DB_NAO
AGENT_DATA_URL AGENT_DATA_API_KEY
-----------------------------------------------------------------------------
env_load() { local env_file="${ENV_FILE:-/opt/incomex/secrets/.env.production}" if [[ ! -r "$env_file" ]]; then log_fatal "env file not readable (Đ33 §14 SSOT): $env_file" exit 3 fi set -a
shellcheck source=/dev/null
source "$env_file" set +a
Fail-fast: required keys cho PG + KB (§6.X CẤM fallback)
local missing=()
local k
for k in PGHOST PGPORT PG_USER_RW PG_DB_MAIN PG_PASSWORD_RW
PG_USER_RO PG_PASSWORD_RO PG_DB_NAO
AGENT_DATA_URL AGENT_DATA_API_KEY; do
[[ -z "${!k:-}" ]] && missing+=("$k")
done
if [[ ${#missing[@]} -gt 0 ]]; then
log_fatal "env file missing required keys: ${missing[*]} (file=${env_file})"
exit 3
fi
}
-----------------------------------------------------------------------------
git_commit 5-tier fallback (§6 Bước 1 giữ v1.1)
-----------------------------------------------------------------------------
detect_git_commit() {
Tier 1: git rev-parse in /opt/incomex/*-repo/
local repo sha for repo in /opt/incomex/*-repo; do [[ -d "$repo/.git" ]] || continue sha="$(git -C "$repo" rev-parse HEAD 2>/dev/null || true)" if [[ -n "$sha" ]]; then GIT_COMMIT="$sha" log_debug "git_commit tier1 from $repo" return 0 fi done
Tier 2: /opt/incomex/RELEASE_VERSION
if [[ -r /opt/incomex/RELEASE_VERSION ]]; then GIT_COMMIT="$(tr -d '[:space:]' < /opt/incomex/RELEASE_VERSION)" if [[ -n "$GIT_COMMIT" ]]; then log_debug "git_commit tier2 from RELEASE_VERSION" return 0 fi fi
Tier 3: env RELEASE_SHA
if [[ -n "${RELEASE_SHA:-}" ]]; then GIT_COMMIT="$RELEASE_SHA" log_debug "git_commit tier3 from $RELEASE_SHA" return 0 fi
Tier 4: vps_deploy_log latest (nếu bảng tồn tại)
if table_exists_in_db "$PG_DB_MAIN" "vps_deploy_log"; then sha="$(run_pg_rw "SELECT git_sha FROM vps_deploy_log ORDER BY deployed_at DESC LIMIT 1" 2>/dev/null || true)" if [[ -n "$sha" ]]; then GIT_COMMIT="$sha" log_debug "git_commit tier4 from vps_deploy_log" return 0 fi fi
Tier 5: unknown + WARN
GIT_COMMIT="unknown" log_warn "git_commit=unknown (tier 5, all fallbacks exhausted — TD-S178-17 vps_deploy_log missing)" }
-----------------------------------------------------------------------------
Đ41 §6.5 — on-deploy gate (chỉ chạy khi is_known_good=true)
-----------------------------------------------------------------------------
on_deploy_gate() { [[ "$TRIGGER_SOURCE" != "on_deploy" ]] && return 0 if ! table_exists_in_db "$PG_DB_MAIN" "vps_deploy_log"; then log_warn "on_deploy requested nhưng vps_deploy_log missing (TD-S178-17) — permissive skip gate, tiếp build" return 0 fi local known_good known_good="$(run_pg_rw "SELECT is_known_good FROM vps_deploy_log ORDER BY deployed_at DESC LIMIT 1" 2>/dev/null || echo 'f')" if [[ "$known_good" != "t" ]]; then log_skip "on_deploy: latest deploy is_known_good=${known_good} — exit 0 per Đ41 §6.5" exit 0 fi log_ok "on_deploy gate PASS (is_known_good=true)" }
-----------------------------------------------------------------------------
§5.1 Đ43 — trigger_source validate từ PG (NT2/NT4: CẤM hardcode 6 enum value).
Thêm trigger_source mới = INSERT context_trigger_sources, 0 sửa code.
-----------------------------------------------------------------------------
validate_trigger_source() { local hit hit="$(run_pg_rw "SELECT code FROM context_trigger_sources WHERE code = '${TRIGGER_SOURCE}'")" if [[ "$hit" != "$TRIGGER_SOURCE" ]]; then local valid valid="$(run_pg_rw "SELECT string_agg(code, '|' ORDER BY code) FROM context_trigger_sources")" log_fatal "trigger_source='${TRIGGER_SOURCE}' không có trong context_trigger_sources (hợp lệ: ${valid})" exit 2 fi }
-----------------------------------------------------------------------------
Đ43 §6.X NT2 — Python deps auto-verify + auto-install
Driven by dot_config.context_pack_python_deps (JSONB array). CẤM hardcode pkg list.
Thiếu → pip install --user --break-system-packages <pkg> → reimport verify.
pip fail hoặc import-after-install fail → exit 3 (PRECHECK_FAIL).
-----------------------------------------------------------------------------
check_python_deps() { local deps_json pkg deps_json="$(dot_config_get 'context_pack_python_deps')"
if [[ -z "$deps_json" || "$deps_json" == "null" || "$deps_json" == "[]" ]]; then log_info "context_pack_python_deps empty — no Python deps required" return 0 fi
local missing=() while IFS= read -r pkg; do [[ -z "$pkg" ]] && continue if python3 -c "import ${pkg}" >/dev/null 2>&1; then log_ok "python dep present: ${pkg}" else missing+=("$pkg") log_warn "python dep missing: ${pkg} — will auto-install (NT2)" fi done < <(jq -r '.[]' <<< "$deps_json")
[[ ${#missing[@]} -eq 0 ]] && return 0
for pkg in "${missing[@]}"; do log_info "pip install --user --break-system-packages ${pkg}" if ! pip install --user --break-system-packages "$pkg" >/dev/null 2>&1; then log_fatal "pip install failed: ${pkg} (check network / user-site perms / PEP 668)" exit 3 fi if ! python3 -c "import ${pkg}" >/dev/null 2>&1; then log_fatal "pip install OK but import still fails: ${pkg} (module name mismatch?)" exit 3 fi log_ok "auto-installed + import verified: ${pkg}" done }
-----------------------------------------------------------------------------
Usage
-----------------------------------------------------------------------------
usage() { cat <<USAGE Usage: ${SCRIPT_NAME} [OPTIONS]
Đ43 v1.2 rev 6 §6 — Context pack builder (8 bước).
OPTIONS: --help, -h In hướng dẫn này --dry-run Chạy 8 bước KHÔNG ghi PG live / KB / FS live --trigger-source=<code> Nguồn trigger. Validate runtime từ PG context_trigger_sources (§5.1 Đ43, NT2/NT4). Default: on_demand --repair Mode repair §6 Bước 7g (phát hiện state post_fs_pre_db_finalize → finalize hoặc rollback) --build-id=<id> Force build_id (debug / repair); default auto-generate --verbose Debug log
EXIT CODES: 0 OK (kể cả coalesced-skip / unchanged-skip) 1 ERROR 2 USAGE 3 PRECHECK_FAIL USAGE }
-----------------------------------------------------------------------------
Argument parser
-----------------------------------------------------------------------------
shellcheck disable=SC2034 # BUILD_ID / REQUEST_ID / OUTPUT_ROOT used in Stage B+C (currently stub)
parse_args() { while [[ $# -gt 0 ]]; do case "$1" in --help|-h) usage; exit 0 ;; --dry-run) DRY_RUN=1; shift ;; --repair) REPAIR=1; shift ;; --verbose) VERBOSE=1; shift ;; --trigger-source=) TRIGGER_SOURCE="${1#=}"; shift ;; --trigger-source) [[ $# -lt 2 ]] && { log_err "--trigger-source requires value"; exit 2; } TRIGGER_SOURCE="$2"; shift 2 ;; --build-id=) BUILD_ID="${1#=}"; shift ;; --build-id) [[ $# -lt 2 ]] && { log_err "--build-id requires value"; exit 2; } BUILD_ID="$2"; shift 2 ;; *) log_err "Unknown option: $1" usage >&2 exit 2 ;; esac done
TRIGGER_SOURCE="${TRIGGER_SOURCE:-on_demand}"
§5.1 Đ43 enum validate — runtime từ PG context_trigger_sources (NT2/NT4).
Gọi ở precheck sau env_load (cần PG env) — xem validate_trigger_source().
}
=============================================================================
8 BƯỚC §6 — stubs (implementation tuần tự: Stage B = 1+2, Stage C = 3+4, P3+P4 = 5-8)
=============================================================================
-----------------------------------------------------------------------------
Bước 1 §6 — PRECHECK
-----------------------------------------------------------------------------
precheck() {
1.0 Env SSOT (Đ33 §14 /opt/incomex/secrets/.env.production)
env_load log_ok "env loaded from SSOT (Đ33 §14)"
1.0.1 PG health tối thiểu để validate_trigger_source (đọc context_trigger_sources)
if ! run_pg_rw "SELECT 1" >/dev/null 2>&1; then log_fatal "PG preflight fail trước validate_trigger_source (${PGHOST}:${PGPORT})" exit 3 fi validate_trigger_source log_ok "trigger_source=${TRIGGER_SOURCE} valid (context_trigger_sources PG)"
1.1 PG health (directus DB via TCP)
if ! run_pg_rw "SELECT 1" >/dev/null 2>&1; then log_fatal "PG health fail (host=${PGHOST}:${PGPORT}, user=${PG_USER_RW}, db=${PG_DB_MAIN})" exit 3 fi log_ok "PG healthy (${PG_USER_RW}@${PGHOST}:${PGPORT}/${PG_DB_MAIN})"
1.2 OUTPUT_ROOT từ dot_config (CẤM hardcode §6.X)
OUTPUT_ROOT="$(dot_config_get 'context_pack_output_root')" log_info "OUTPUT_ROOT=${OUTPUT_ROOT} (from dot_config)"
1.3 Folder existence check (Phase 2 đã tạo 3 folders)
local dir for dir in "$OUTPUT_ROOT" "${OUTPUT_ROOT}.tmp" "${OUTPUT_ROOT}-staging"; do if [[ ! -d "$dir" ]]; then log_fatal "required folder missing: ${dir} (rerun dot-dieu43-fs-init.sh Phase 2)" exit 3 fi done log_ok "output folders exist: ${OUTPUT_ROOT}{,.tmp,-staging}"
1.4 Lock dir writable check (use tmp folder as lock dir proxy)
if [[ ! -w "${OUTPUT_ROOT}.tmp" ]]; then log_warn "lock dir not writable as $(whoami): ${OUTPUT_ROOT}.tmp (need docker/sudo for write in Bước 5)" else log_ok "lock dir writable: ${OUTPUT_ROOT}.tmp" fi
1.5 dot_operations có CONTEXT_PACK_BUILD (Phase 1 migration đã seed §5.6)
local op_count op_count="$(run_pg_rw "SELECT COUNT(*) FROM dot_operations WHERE code='CONTEXT_PACK_BUILD'")" if [[ "$op_count" != "1" ]]; then log_fatal "dot_operations missing CONTEXT_PACK_BUILD (count=${op_count}, expect 1) — Phase 1 migration incomplete" exit 3 fi log_ok "dot_operations.CONTEXT_PACK_BUILD present"
1.6 KB API health (env đã load ở 1.0)
local http_code
http_code="$(curl -sS -o /dev/null -w '%{http_code}' --max-time 10
-H "X-API-Key: ${AGENT_DATA_API_KEY}"
"${AGENT_DATA_URL}/health" 2>/dev/null || echo '000')"
if [[ "$http_code" != "200" ]]; then
log_fatal "KB API health fail HTTP=${http_code} (URL=${AGENT_DATA_URL})"
exit 3
fi
log_ok "KB API healthy (${AGENT_DATA_URL})"
1.7 git_commit 5-tier fallback
detect_git_commit log_info "git_commit=${GIT_COMMIT}"
1.8 on-deploy gate Đ41 §6.5
on_deploy_gate
1.9 Python deps (NT2: dot_config driven, CẤM hardcode §6.X)
check_python_deps }
-----------------------------------------------------------------------------
Bước 2 §6 — TRY-LOCK + coalesce-skip + INSERT request
-----------------------------------------------------------------------------
try_lock() {
2.1 Compute dedupe_bucket (hourly, per trigger)
DEDUPE_BUCKET="$(run_pg_rw "SELECT date_trunc('hour', now())::text")" log_debug "dedupe_bucket=${DEDUPE_BUCKET}"
2.2 try acquire advisory lock (43, 1) — namespace build per §6 v1.1
local got got="$(run_pg_rw "SELECT pg_try_advisory_lock(${LOCK_NS_BUILD_CLASSID}, ${LOCK_NS_BUILD_OBJID})")"
if [[ "$got" != "t" ]]; then # 2.3a Busy → coalesce skip (§6 Bước 2 v1.1) log_skip "advisory_lock(${LOCK_NS_BUILD_CLASSID},${LOCK_NS_BUILD_OBJID}) BUSY — coalesce skip" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would INSERT request (trigger=${TRIGGER_SOURCE}, bucket=${DEDUPE_BUCKET}, status=skipped, reason=coalesced)" else run_pg_rw "INSERT INTO context_pack_requests (trigger_source, dedupe_bucket, status, detail) VALUES ('${TRIGGER_SOURCE}', '${DEDUPE_BUCKET}', 'skipped', '{"reason":"coalesced"}'::jsonb)" >/dev/null || true log_ok "coalesce-skip request inserted" fi exit 0 fi
2.3b Lock acquired
LOCK_HELD=1 log_ok "advisory_lock(${LOCK_NS_BUILD_CLASSID},${LOCK_NS_BUILD_OBJID}) acquired"
2.4 Generate build_id nếu --build-id không set
if [[ -z "$BUILD_ID" ]]; then # Avoid tr|head SIGPIPE (141) under pipefail; use bash $RANDOM local _rand printf -v _rand '%04x%02x' "$RANDOM" "$((RANDOM % 256))" BUILD_ID="$(date -u +%Y%m%d-%H%M%S)-${_rand}" fi log_info "build_id=${BUILD_ID}"
2.5 INSERT request status='running' (or reuse existing row when retry test)
if [[ $DRY_RUN -eq 1 ]]; then log_dry "would INSERT request (trigger=${TRIGGER_SOURCE}, bucket=${DEDUPE_BUCKET}, status=running, build_id=${BUILD_ID})" REQUEST_ID="dry-run" elif [[ -n "${DOT_TEST_REUSE_REQUEST_ID:-}" ]]; then # Test hook: reuse existing request row (simulate retry scheduler pickup) REQUEST_ID="$DOT_TEST_REUSE_REQUEST_ID" run_pg_rw "UPDATE context_pack_requests SET status='running', started_at=now(), detail = detail || jsonb_build_object('build_id','${BUILD_ID}','retry_attempt', (retry_count+1)) WHERE id=${REQUEST_ID}" >/dev/null log_warn "DOT_TEST_REUSE_REQUEST_ID=${REQUEST_ID} → reuse existing row (test hook, skip INSERT)" else REQUEST_ID="$(run_pg_rw "INSERT INTO context_pack_requests (trigger_source, dedupe_bucket, status, started_at, detail) VALUES ('${TRIGGER_SOURCE}', '${DEDUPE_BUCKET}', 'running', now(), jsonb_build_object('build_id', '${BUILD_ID}')) RETURNING id" | head -1)" log_ok "request_id=${REQUEST_ID} inserted (status=running)" fi }
-----------------------------------------------------------------------------
Bước 3 §6 — QUERY PG (reference tables + dot_config whitelist + pg_database fallback)
Sources per prompt §3 Stage C + §6 Đ43 v1.2 rev 6:
law_count ← normative_registry
dot_count ← dot_tools
entity_count ← birth_registry
species_count ← meta_catalog
db_count ← dot_config.context_pack_scan_db_whitelist
rỗng → pg_database catalog exclude (postgres,template0,template1) [NT11]
-----------------------------------------------------------------------------
query_pg() { local db_whitelist_json whitelist_len db_whitelist_json="$(dot_config_get 'context_pack_scan_db_whitelist')" whitelist_len="$(jq 'length' <<< "$db_whitelist_json")"
if [[ "$whitelist_len" == "0" ]]; then # NT11: khai tối thiểu — pg_database catalog thay hardcode số DB DB_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT(*) FROM pg_database WHERE datname NOT IN ('template0','template1','postgres')")" log_debug "db_count: pg_database catalog (NT11, whitelist empty) → ${DB_COUNT}" else DB_COUNT="$whitelist_len" log_debug "db_count: context_pack_scan_db_whitelist length → ${DB_COUNT}" fi
Counts from authoritative reference tables (read-only role cross-DB Phase 1.5 P10)
LAW_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT() FROM normative_registry")" DOT_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT() FROM dot_tools")" ENTITY_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT() FROM birth_registry")" SPECIES_COUNT="$(run_pg_ro_db "$PG_DB_MAIN" "SELECT COUNT() FROM meta_catalog")"
log_ok "query_pg: law=${LAW_COUNT} dot=${DOT_COUNT} entity=${ENTITY_COUNT} species=${SPECIES_COUNT} db=${DB_COUNT}" }
-----------------------------------------------------------------------------
Bước 4 §6 — SCAN FS (dot_config.context_pack_scan_paths, mtime cache)
Cache: TSV per build — path\ttotal_files\tlatest_mtime_epoch
-----------------------------------------------------------------------------
scan_fs() { local paths_json paths_json="$(dot_config_get 'context_pack_scan_paths')"
local tmp_cache="${TMPDIR}/dcp-scan-${BUILD_ID:-nobuild}.tsv" : > "$tmp_cache"
local total=0 skipped=0 local path count newest while IFS= read -r path; do [[ -z "$path" ]] && continue if [[ ! -d "$path" ]]; then log_warn "scan_fs: folder missing → skip: ${path}" skipped=$((skipped + 1)) continue fi count="$(find "$path" -maxdepth 3 -type f 2>/dev/null | wc -l)" # awk avoids SIGPIPE under pipefail (processes all input, emits once) newest="$(find "$path" -maxdepth 3 -type f -printf '%T@\n' 2>/dev/null | awk '$1 > max {max=$1} END {print max+0}')" printf '%s\t%s\t%s\n' "$path" "$count" "$newest" >> "$tmp_cache" total=$((total + count)) log_debug "scan_fs: ${path} total=${count} newest_mtime=${newest}" done < <(jq -r '.[]' <<< "$paths_json")
SCANNED_FILE_COUNT="$total" log_ok "scan_fs: scanned=${SCANNED_FILE_COUNT} skipped=${skipped} cache=${tmp_cache}" }
-----------------------------------------------------------------------------
§6 Bước 5 rev 5 — strip volatile header cho logical_checksum
markdown/mermaid: sed block-delete 2 delimiter lines + 4 key lines giữa
json: jq -S 'del(._volatile_header)' + canonicalize sort keys
-----------------------------------------------------------------------------
strip_volatile_header() { local file="$1" local fmt="$2" case "$fmt" in markdown|mermaid) sed '/<!-- VOLATILE HEADER -->/,/<!-- \/VOLATILE HEADER -->/d' "$file" ;; json) jq -S 'del(._volatile_header)' "$file" ;; *) cat "$file" ;; esac }
-----------------------------------------------------------------------------
Bước 5 §6 Đ43 rev 6 — GENERATE (generic dispatcher)
-----------------------------------------------------------------------------
NT2/NT4/rev 4 compliance: 0 case-dispatch per section, 0 hardcode template,
mọi section render qua /opt/incomex/dot/lib/cp-render-section.py.
Filter test: ENV var ONLY_SECTION='code' để chạy 1 section.
Output: ${OUTPUT_ROOT}.tmp/${BUILD_ID}/<output_filename> (ephemeral staging,
promote live ở Bước 7).
-----------------------------------------------------------------------------
generate() { if [[ -z "${BUILD_ID:-}" ]]; then log_fatal "generate: BUILD_ID unset (Bước 2 try_lock phải chạy trước)" exit 1 fi
local generated_at generated_at="$(date -u +%Y-%m-%dT%H:%M:%SZ)" local out_dir="${OUTPUT_ROOT}.tmp/${BUILD_ID}" mkdir -p "$out_dir" log_info "generate out_dir=${out_dir}"
local where="" if [[ -n "${ONLY_SECTION:-}" ]]; then where=" AND code = '${ONLY_SECTION}'" log_info "generate ONLY_SECTION=${ONLY_SECTION} (single-section filter)" fi
local sections sections="$(run_pg_rw " SELECT COALESCE(json_agg(json_build_object( 'code', code, 'format', format, 'data_source', data_source, 'target_db', COALESCE(target_db, ''), 'template_kb_path', COALESCE(template_kb_path, ''), 'query_kb_path', COALESCE(query_kb_path, ''), 'render_config', render_config::text, 'output_filename', output_filename, 'min_size_bytes', min_size_bytes, 'max_size_bytes', COALESCE(max_size_bytes, 2147483647) ) ORDER BY order_index), '[]'::json) FROM context_pack_section_definitions WHERE is_active = true${where}")"
local total total="$(jq 'length' <<< "$sections")" if [[ "$total" -eq 0 ]]; then log_fatal "generate: 0 active section khớp filter (ONLY_SECTION=${ONLY_SECTION:-<none>})" exit 1 fi log_info "generate: ${total} active section(s)"
local render_log="/tmp/dcp-render-${BUILD_ID}.log" : > "$render_log" local cs_tsv="/tmp/dcp-cs-${BUILD_ID}.tsv" : > "$cs_tsv"
local i=0 ok=0 warn=0 fail=0 skip=0 while [[ $i -lt $total ]]; do local sec code fmt ds tdb tpl qry rc outname minb maxb size outfile sec="$(jq -c ".[$i]" <<< "$sections")" code="$(jq -r '.code' <<< "$sec")" fmt="$(jq -r '.format' <<< "$sec")" ds="$(jq -r '.data_source' <<< "$sec")" tdb="$(jq -r '.target_db' <<< "$sec")" tpl="$(jq -r '.template_kb_path' <<< "$sec")" qry="$(jq -r '.query_kb_path' <<< "$sec")" rc="$(jq -r '.render_config' <<< "$sec")" outname="$(jq -r '.output_filename' <<< "$sec")" minb="$(jq -r '.min_size_bytes' <<< "$sec")" maxb="$(jq -r '.max_size_bytes' <<< "$sec")" outfile="${out_dir}/${outname}"
log_info "generate[${code}] → ${outname}"
# Temporarily disable set -e while calling helper so we can capture failure per section
set +e
size="$(python3 /opt/incomex/dot/lib/cp-render-section.py \
--code="$code" \
--format="$fmt" \
--data-source="$ds" \
--target-db="$tdb" \
--template-path="$tpl" \
--query-path="$qry" \
--render-config="$rc" \
--output-file="$outfile" \
--generated-at="$generated_at" \
--build-id="$BUILD_ID" \
--git-commit="$GIT_COMMIT" \
--trigger-source="$TRIGGER_SOURCE" \
2>>"$render_log")"
local rc_code=$?
set -e
if [[ $rc_code -eq 77 ]]; then
log_warn "generate[${code}] SKIP data_source chưa support (exit 77)"
skip=$((skip + 1))
elif [[ $rc_code -ne 0 ]]; then
log_err "generate[${code}] FAIL rc=${rc_code} (log: ${render_log})"
fail=$((fail + 1))
else
# §6 Bước 5 — 2 checksum + line_count per section
local logical_cs file_cs line_count
logical_cs="$(strip_volatile_header "$outfile" "$fmt" | sha256sum | awk '{print $1}')"
file_cs="$(sha256sum "$outfile" | awk '{print $1}')"
line_count="$(wc -l < "$outfile")"
printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' "$code" "$outname" "$fmt" "$size" "$line_count" "$logical_cs" "$file_cs" >> "$cs_tsv"
if [[ "$size" -lt "$minb" ]]; then
log_warn "generate[${code}] size=${size} < min_size_bytes=${minb} logical=${logical_cs:0:12}"
warn=$((warn + 1))
ok=$((ok + 1))
elif [[ "$size" -gt "$maxb" ]]; then
log_warn "generate[${code}] size=${size} > max_size_bytes=${maxb} logical=${logical_cs:0:12}"
warn=$((warn + 1))
ok=$((ok + 1))
else
log_ok "generate[${code}] size=${size} logical=${logical_cs:0:12} file=${file_cs:0:12}"
ok=$((ok + 1))
fi
fi
i=$((i + 1))
done
Finalize checksum manifest (§5.4 manifest prep, persist ở Bước 7)
Enrich với trigger_source, git_commit, counts — cho 7g repair replay không cần re-query.
local manifest="${out_dir}/.checksums.json"
jq -sR
--arg bid "$BUILD_ID"
--arg gat "$generated_at"
--arg ts "$TRIGGER_SOURCE"
--arg gc "$GIT_COMMIT"
--argjson law "${LAW_COUNT:-0}"
--argjson dot "${DOT_COUNT:-0}"
--argjson ent "${ENTITY_COUNT:-0}"
--argjson sp "${SPECIES_COUNT:-0}"
--argjson db "${DB_COUNT:-0}" '
split("\n") | map(select(length>0)) | map(split("\t")) |
{build_id: $bid, generated_at: $gat, trigger_source: $ts, git_commit: $gc,
counts: {law: $law, dot: $dot, entity: $ent, species: $sp, db: $db},
sections: map({code:.[0], output_filename:.[1], format:.[2],
size:(.[3]|tonumber), line_count:(.[4]|tonumber),
logical_sha256:.[5], file_sha256:.[6]})}
' "$cs_tsv" > "$manifest"
log_info "checksum manifest: ${manifest} ($(jq '.sections|length' "$manifest") sections)"
log_info "generate summary: ok=${ok} warn=${warn} skip=${skip} fail=${fail} total=${total}" if [[ $fail -gt 0 ]]; then log_fatal "generate: ${fail}/${total} section failed — xem ${render_log}" exit 1 fi log_ok "generate DONE out=${out_dir}" }
-----------------------------------------------------------------------------
Bước 6 §6 Đ43 — VALIDATE (size + format check, per section)
-----------------------------------------------------------------------------
Size: file_size ∈ [min_size_bytes, max_size_bytes] từ section_definitions
Format: markdown/mermaid → volatile header block đầy đủ (open + 4 key + close)
json → jq parse OK + _volatile_header key tồn tại
mermaid syntax → mmdc binary nếu có (else skip — soft check)
Exit 1 nếu fail >0. WARN không block.
-----------------------------------------------------------------------------
validate() { local out_dir="${OUTPUT_ROOT}.tmp/${BUILD_ID}" if [[ ! -d "$out_dir" ]]; then log_fatal "validate: out_dir không tồn tại: ${out_dir}" exit 1 fi
local sections sections="$(run_pg_rw " SELECT COALESCE(json_agg(json_build_object( 'code', code, 'format', format, 'output_filename', output_filename, 'min_size_bytes', min_size_bytes, 'max_size_bytes', COALESCE(max_size_bytes, 2147483647) ) ORDER BY order_index), '[]'::json) FROM context_pack_section_definitions WHERE is_active = true")"
local total total="$(jq 'length' <<< "$sections")" log_info "validate: ${total} active section(s)"
local has_mmdc=0 command -v mmdc >/dev/null 2>&1 && has_mmdc=1
local i=0 ok=0 warn=0 fail=0 while [[ $i -lt $total ]]; do local sec code fmt outname minb maxb outfile size sec="$(jq -c ".[$i]" <<< "$sections")" code="$(jq -r '.code' <<< "$sec")" fmt="$(jq -r '.format' <<< "$sec")" outname="$(jq -r '.output_filename' <<< "$sec")" minb="$(jq -r '.min_size_bytes' <<< "$sec")" maxb="$(jq -r '.max_size_bytes' <<< "$sec")" outfile="${out_dir}/${outname}"
# V1: file existence
if [[ ! -f "$outfile" ]]; then
log_err "validate[${code}] file missing: ${outfile}"
fail=$((fail + 1))
i=$((i + 1)); continue
fi
size="$(wc -c < "$outfile")"
# V2: size bounds
local size_status="OK"
if [[ "$size" -lt "$minb" ]]; then
log_warn "validate[${code}] size=${size} < min=${minb}"
size_status="UNDER"
warn=$((warn + 1))
elif [[ "$size" -gt "$maxb" ]]; then
log_warn "validate[${code}] size=${size} > max=${maxb}"
size_status="OVER"
warn=$((warn + 1))
fi
# V3: format check
local fmt_status="OK"
case "$fmt" in
markdown|mermaid)
if ! grep -q '^<!-- VOLATILE HEADER -->$' "$outfile" \
|| ! grep -q '^<!-- /VOLATILE HEADER -->$' "$outfile"; then
log_err "validate[${code}] missing volatile header delimiters (md/mermaid)"
fmt_status="FAIL"
fail=$((fail + 1))
else
# Đ43 §6 Bước 5 rev 5 — 4 common runtime fields bắt buộc.
# Đ43 audit R2 Fix B — grep chỉ trong header block (giữa 2 dòng delimiter),
# tránh false pass khi body chứa key trùng tên.
local vh_block
vh_block="$(sed -n '/^<!-- VOLATILE HEADER -->$/,/^<!-- \/VOLATILE HEADER -->$/p' "$outfile")"
local missing_keys=""
local vh_key
for vh_key in generated_at build_id git_commit trigger_source; do
grep -qE "^${vh_key}:[[:space:]]+[^[:space:]]" <<<"$vh_block" \
|| missing_keys="${missing_keys} ${vh_key}"
done
if [[ -n "$missing_keys" ]]; then
log_err "validate[${code}] volatile header thiếu 4-field bắt buộc (trong header block):${missing_keys}"
fmt_status="FAIL"
fail=$((fail + 1))
fi
fi
# mermaid extra: nếu có mmdc, thử parse
if [[ "$fmt" == "mermaid" && $has_mmdc -eq 1 ]]; then
if ! mmdc -i "$outfile" -o /tmp/dcp-mmd-probe-${BUILD_ID}.svg >/dev/null 2>&1; then
log_err "validate[${code}] mmdc parse fail"
fmt_status="FAIL"
fail=$((fail + 1))
fi
rm -f /tmp/dcp-mmd-probe-${BUILD_ID}.svg
fi
;;
json)
if ! jq empty "$outfile" >/dev/null 2>&1; then
log_err "validate[${code}] jq parse fail"
fmt_status="FAIL"
fail=$((fail + 1))
elif ! jq -e 'has("_volatile_header")' "$outfile" >/dev/null 2>&1; then
log_err "validate[${code}] _volatile_header key missing"
fmt_status="FAIL"
fail=$((fail + 1))
elif ! jq -e '._volatile_header | has("generated_at") and has("build_id") and has("git_commit") and has("trigger_source")' "$outfile" >/dev/null 2>&1; then
log_err "validate[${code}] _volatile_header thiếu 4-field bắt buộc (generated_at/build_id/git_commit/trigger_source)"
fmt_status="FAIL"
fail=$((fail + 1))
fi
;;
*)
log_warn "validate[${code}] unknown format=${fmt}"
fmt_status="UNKNOWN"
;;
esac
if [[ "$size_status" == "OK" && "$fmt_status" == "OK" ]]; then
log_ok "validate[${code}] size=${size} (${minb}≤x≤${maxb}) format=${fmt} PASS"
ok=$((ok + 1))
fi
i=$((i + 1))
done
log_info "validate summary: ok=${ok} warn=${warn} fail=${fail} total=${total}" if [[ $fail -gt 0 ]]; then log_fatal "validate: ${fail}/${total} section failed" exit 1 fi log_ok "validate DONE" }
=============================================================================
Bước 7 §6 Đ43 — 2-PHASE PUBLISH (7a-7g)
Sub-step map:
7a COPY build_dir → staging_dir
7b VALIDATE staging file_sha256 match manifest (no drift)
7c SWAP SYMLINK OUTPUT_ROOT/current → staging/BUILD_ID (atomic via mv -T)
7d VERIFY readlink matches expected staging path
7e KB MIRROR upload 8 section lên Agent Data API path context-pack/
7f PG INSERT context_pack_manifest + 8 context_pack_sections (1 TX Đ35 §5.1)
7g REPAIR detect post_fs_pre_db state → finalize hoặc rollback
Pattern symlink: OUTPUT_ROOT là dir owned by incomex → OUTPUT_ROOT/current symlink
(tránh cần write trên /opt/incomex/ parent root-owned).
Atomic rename: mv -Tf trên cùng FS.
=============================================================================
publish() { publish_fs # 7a-7d
DEBUG HOOK (test-only): exit after 7d để simulate crash pre-KB/DB
if [[ "${DOT_TEST_STOP_AFTER:-}" == "7d" ]]; then log_warn "DOT_TEST_STOP_AFTER=7d → exit after 7d (test hook, repair mode expected next)" exit 0 fi publish_kb # 7e publish_db # 7f (sets MANIFEST_ID global) }
-----------------------------------------------------------------------------
Bước 7a-7d — FS-only publish (staging → live symlink)
-----------------------------------------------------------------------------
publish_fs() { local src_dir="${OUTPUT_ROOT}.tmp/${BUILD_ID}" local staging_dir="${OUTPUT_ROOT}-staging/${BUILD_ID}" local live_link="${OUTPUT_ROOT}/current"
if [[ ! -d "$src_dir" ]]; then log_fatal "publish 7a: build dir missing: ${src_dir}" exit 1 fi
---------------------------------------------------------------------------
7a COPY build → staging
---------------------------------------------------------------------------
log_info "7a COPY ${src_dir}/ → ${staging_dir}/" if [[ $DRY_RUN -eq 1 ]]; then log_dry "rsync -a ${src_dir}/ ${staging_dir}/ (skip FS write)" else mkdir -p "$staging_dir" rsync -a "${src_dir}/" "${staging_dir}/" local n_copied n_copied="$(find "$staging_dir" -maxdepth 1 -type f 2>/dev/null | wc -l)" log_ok "7a COPY done (${n_copied} files in staging)" fi
---------------------------------------------------------------------------
7b VALIDATE staging matches manifest (no drift)
---------------------------------------------------------------------------
log_info "7b VALIDATE staging checksums" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would recompute sha256 cho từng staging file, so với .checksums.json" else local manifest="${staging_dir}/.checksums.json" if [[ ! -f "$manifest" ]]; then log_fatal "7b manifest missing: ${manifest}" exit 1 fi local mismatches=0 total=0 local section_json code outname expected actual while IFS= read -r section_json; do total=$((total + 1)) code="$(jq -r '.code' <<< "$section_json")" outname="$(jq -r '.output_filename' <<< "$section_json")" expected="$(jq -r '.file_sha256' <<< "$section_json")" if [[ ! -f "${staging_dir}/${outname}" ]]; then log_err "7b staging missing file: ${outname}" mismatches=$((mismatches + 1)) continue fi actual="$(sha256sum "${staging_dir}/${outname}" | awk '{print $1}')" if [[ "$actual" != "$expected" ]]; then log_err "7b drift ${code}: manifest=${expected:0:12} staging=${actual:0:12}" mismatches=$((mismatches + 1)) fi done < <(jq -c '.sections[]' "$manifest") if [[ $mismatches -gt 0 ]]; then log_fatal "7b staging drift: ${mismatches}/${total} file(s) mismatch (§5.4 file_checksum)" exit 1 fi log_ok "7b staging verified: ${total}/${total} file_sha256 match manifest" fi
---------------------------------------------------------------------------
7c SWAP live symlink (atomic mv -T trên cùng FS)
---------------------------------------------------------------------------
log_info "7c SWAP ${live_link} → ${staging_dir}" if [[ $DRY_RUN -eq 1 ]]; then log_dry "ln -sfn ${staging_dir} ${live_link}.new && mv -Tf ${live_link}.new ${live_link}" else if [[ ! -d "${OUTPUT_ROOT}" ]]; then log_fatal "7c OUTPUT_ROOT missing: ${OUTPUT_ROOT}" exit 1 fi local tmp_link="${live_link}.new.$$" ln -sfn "$staging_dir" "$tmp_link" mv -Tf "$tmp_link" "$live_link" log_ok "7c symlink swapped (atomic rename via mv -T)" fi
---------------------------------------------------------------------------
7d VERIFY symlink target
---------------------------------------------------------------------------
log_info "7d VERIFY symlink target" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would readlink ${live_link} và compare to ${staging_dir}" else if [[ ! -L "$live_link" ]]; then log_fatal "7d ${live_link} không phải symlink" exit 1 fi local actual_target actual_target="$(readlink "$live_link")" if [[ "$actual_target" != "$staging_dir" ]]; then log_fatal "7d symlink wrong target: expected=${staging_dir} actual=${actual_target}" exit 1 fi local probe="${live_link}/PROJECT_MAP.md" if [[ ! -r "$probe" ]]; then log_warn "7d probe file unreadable qua symlink: ${probe}" fi log_ok "7d symlink verified: ${live_link} -> ${actual_target}" fi }
-----------------------------------------------------------------------------
Bước 7e — KB MIRROR: upload 8 section lên Agent Data API
Path: context-pack/<BUILD_ID>/<output_filename>. Storage key: context-pack__BID__name
-----------------------------------------------------------------------------
publish_kb() { local staging_dir="${OUTPUT_ROOT}-staging/${BUILD_ID}" local manifest_file="${staging_dir}/.checksums.json"
log_info "7e KB MIRROR upload to ${AGENT_DATA_URL}/documents" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would POST 8 section → context-pack/${BUILD_ID}/*" KB_MIRROR_OK=1 return 0 fi if [[ ! -f "$manifest_file" ]]; then log_fatal "7e manifest file missing: ${manifest_file}" exit 1 fi
local ok=0 fail=0
local section_json code outname fmt body mime doc_id resp http_code
while IFS= read -r section_json; do
code="$(jq -r '.code' <<< "$section_json")"
outname="$(jq -r '.output_filename' <<< "$section_json")"
fmt="$(jq -r '.format' <<< "$section_json")"
# Mime theo format (chuẩn hoá output, không suy từ extension để tránh drift)
case "$fmt" in
markdown) mime="text/markdown" ;;
json) mime="application/json" ;;
mermaid) mime="text/plain" ;;
*) mime="text/plain" ;;
esac
doc_id="context-pack/${BUILD_ID}/${outname}"
body="$(cat "${staging_dir}/${outname}")"
# Build JSON payload via jq (escape-safe)
local payload
payload="$(jq -n
--arg did "$doc_id"
--arg mime "$mime"
--arg body "$body"
--arg title "Đ43 context-pack ${BUILD_ID} section=${code}"
--arg src "dieu43_context_pack_publish"
--arg bid "$BUILD_ID"
'{document_id:$did, parent_id:"root",
content:{mime_type:$mime, body:$body},
metadata:{title:$title, tags:["dieu43","context-pack","build"], source:$src,
build_id:$bid}}')"
resp="$(curl -sS -o /tmp/dcp-kb-${BUILD_ID}-${code}.resp -w '%{http_code}'
--max-time 30
-X POST
-H "Content-Type: application/json"
-H "X-API-Key: ${AGENT_DATA_API_KEY}"
--data-binary @-
"${AGENT_DATA_URL}/documents?upsert=true" <<< "$payload" 2>/dev/null)"
http_code="$resp"
if [[ "$http_code" == "200" || "$http_code" == "201" ]]; then
log_ok "7e uploaded ${code} → ${doc_id} HTTP=${http_code}"
ok=$((ok + 1))
rm -f /tmp/dcp-kb-${BUILD_ID}-${code}.resp
else
log_err "7e upload FAIL ${code} → ${doc_id} HTTP=${http_code} body=$(head -c 200 /tmp/dcp-kb-${BUILD_ID}-${code}.resp 2>/dev/null)"
fail=$((fail + 1))
fi
done < <(jq -c '.sections[]' "$manifest_file")
log_info "7e KB mirror summary: ok=${ok} fail=${fail}" if [[ $fail -gt 0 ]]; then KB_MIRROR_OK=0 log_fatal "7e KB mirror: ${fail} section fail" exit 1 fi KB_MIRROR_OK=1 log_ok "7e KB mirror DONE (${ok}/8 sections live)" }
-----------------------------------------------------------------------------
Bước 7f — PG INSERT context_pack_manifest + 8 context_pack_sections (1 TX)
Đ35 §5.1 CẤM partial: manifest + 8 sections trong cùng transaction.
Sets MANIFEST_ID global cho release().
-----------------------------------------------------------------------------
publish_db() { local staging_dir="${OUTPUT_ROOT}-staging/${BUILD_ID}" local manifest_file="${staging_dir}/.checksums.json"
log_info "7f PG INSERT manifest + sections (1 TX)" if [[ $DRY_RUN -eq 1 ]]; then log_dry "would INSERT context_pack_manifest + 8 context_pack_sections + capture manifest_id" MANIFEST_ID="dry-run" return 0 fi if [[ ! -f "$manifest_file" ]]; then log_fatal "7f manifest file missing: ${manifest_file}" exit 1 fi
Aggregate checksums (sort by order_index in section_definitions)
Use section code ordering as proxy since manifest JSON preserves order from section_definitions ORDER BY order_index
local agg_logical agg_file total_size sec_count agg_logical="$(jq -r '.sections | map(.logical_sha256) | join("")' "$manifest_file" | sha256sum | awk '{print $1}')" agg_file="$(jq -r '.sections | map(.file_sha256) | join("")' "$manifest_file" | sha256sum | awk '{print $1}')" total_size="$(jq '[.sections[].size] | add' "$manifest_file")" sec_count="$(jq '.sections | length' "$manifest_file")"
local generated_at git_commit trigger_source local law_count dot_count entity_count species_count db_count generated_at="$(jq -r '.generated_at' "$manifest_file")" git_commit="$(jq -r '.git_commit' "$manifest_file")" trigger_source="$(jq -r '.trigger_source' "$manifest_file")" law_count="$(jq -r '.counts.law' "$manifest_file")" dot_count="$(jq -r '.counts.dot' "$manifest_file")" entity_count="$(jq -r '.counts.entity' "$manifest_file")" species_count="$(jq -r '.counts.species' "$manifest_file")" db_count="$(jq -r '.counts.db' "$manifest_file")"
local kb_status="live" [[ "${KB_MIRROR_OK:-0}" -ne 1 ]] && kb_status="failed" local publish_status="live" # 7c-7d đã PASS nếu đến đây local health_status="healthy" # validate PASS nếu đến đây
Build VALUES tuples cho 8 sections (s(section_code, file_path, kb_document_path, size, line_count, logical, file_checksum))
local sections_values
sections_values="$(jq -r
--arg sdir "$staging_dir"
--arg bid "$BUILD_ID" '
.sections |
map(
"(" +
(.code | @json | gsub("""; "\u0027")) + ", " +
(($sdir + "/" + .output_filename) | @json | gsub("""; "\u0027")) + ", " +
(("context-pack__" + $bid + "__" + .output_filename) | @json | gsub("""; "\u0027")) + ", " +
(.size | tostring) + ", " +
(.line_count | tostring) + ", " +
(.logical_sha256 | @json | gsub("""; "\u0027")) + ", " +
(.file_sha256 | @json | gsub("""; "\u0027")) + ")"
) | join(",\n ")
' "$manifest_file")"
Escape single quotes trong git_commit (unlikely nhưng safe)
local git_commit_sql="${git_commit//'/''}"
CTE-based single statement (implicit TX), RETURNING manifest_id
local sql sql="$(cat <<SQL WITH new_manifest AS ( INSERT INTO context_pack_manifest( generated_at, published_at, git_commit, trigger_source, law_count, dot_count, entity_count, species_count, db_count, total_size_bytes, section_count, logical_checksum_sha256, file_checksum_sha256, publish_status, kb_mirror_status, publish_step, health_status, _dot_origin ) VALUES ( '${generated_at}', now(), '${git_commit_sql}', '${trigger_source}', ${law_count}, ${dot_count}, ${entity_count}, ${species_count}, ${db_count}, ${total_size}, ${sec_count}, '${agg_logical}', '${agg_file}', '${publish_status}', '${kb_status}', 'done', '${health_status}', 'dieu43_v1_2_rev6_publish' ) RETURNING id ), inserted_sections AS ( INSERT INTO context_pack_sections( manifest_id, section_code, file_path, kb_document_path, size_bytes, line_count, logical_checksum_sha256, file_checksum_sha256 ) SELECT nm.id, s.section_code, s.file_path, s.kb_document_path, s.size_bytes, s.line_count, s.logical_checksum_sha256, s.file_checksum_sha256 FROM new_manifest nm CROSS JOIN (VALUES ${sections_values} ) AS s(section_code, file_path, kb_document_path, size_bytes, line_count, logical_checksum_sha256, file_checksum_sha256) RETURNING id ) SELECT id FROM new_manifest; SQL )"
local err_log="/tmp/dcp-7f-${BUILD_ID}.err"
local out rc
out="$(PGPASSWORD="$PG_PASSWORD_RW" LC_ALL=C.UTF-8 LANG=C.UTF-8
psql -h "$PGHOST" -p "$PGPORT"
-U "$PG_USER_RW" -d "$PG_DB_MAIN" -tAXq -v ON_ERROR_STOP=1
-c "$sql" 2>"$err_log")"
rc=$?
if [[ $rc -ne 0 ]]; then
log_fatal "7f psql failed (rc=${rc}): $(cat "$err_log" | tail -5)"
exit 1
fi
MANIFEST_ID="$(echo "$out" | tr -d '[:space:]')"
if ! [[ "$MANIFEST_ID" =~ ^[0-9]+$ ]]; then
log_fatal "7f unexpected psql output stdout='${out}' stderr='$(cat "$err_log")'"
exit 1
fi
rm -f "$err_log"
log_ok "7f INSERT DONE manifest_id=${MANIFEST_ID} sections=${sec_count} (1 TX via CTE)"
}
-----------------------------------------------------------------------------
Bước 8 §6 Đ43 — RELEASE (minimal, retry logic ở P4c)
UPDATE context_pack_requests status='done', manifest_id, finished_at.
-----------------------------------------------------------------------------
release() { if [[ $DRY_RUN -eq 1 ]]; then log_dry "would UPDATE request status=done, manifest_id=${MANIFEST_ID:-?}, finished_at=now()" return 0 fi if [[ -z "${REQUEST_ID:-}" || "$REQUEST_ID" == "dry-run" ]]; then log_warn "release: REQUEST_ID unset — skip UPDATE" return 0 fi if [[ -z "${MANIFEST_ID:-}" || "$MANIFEST_ID" == "dry-run" ]]; then log_fatal "release: MANIFEST_ID unset — Bước 7f failed?" exit 1 fi run_pg_rw "UPDATE context_pack_requests SET status='done', manifest_id=${MANIFEST_ID}, finished_at=now(), last_error=NULL WHERE id=${REQUEST_ID}" >/dev/null log_ok "release DONE request_id=${REQUEST_ID} manifest_id=${MANIFEST_ID} status=done" }
-----------------------------------------------------------------------------
Bước 8 §6 Đ43 — RELEASE FAILURE path (retry hoặc failed)
Driven by dot_config.context_pack_retry_policy = {max_retries, backoff_seconds[]}.
Được gọi từ on_exit trap khi build fail giữa chừng (chưa có MANIFEST_ID).
-----------------------------------------------------------------------------
release_failure() { local err_msg="${1:-unknown}" if [[ "${DRY_RUN:-0}" -eq 1 ]]; then log_dry "would release_failure err='${err_msg}'" return 0 fi if [[ -z "${REQUEST_ID:-}" || "$REQUEST_ID" == "dry-run" ]]; then return 0 fi
local policy max_retries current_retry new_retry policy="$(dot_config_get 'context_pack_retry_policy')" max_retries="$(jq -r '.max_retries' <<< "$policy")"
current_retry="$(run_pg_rw "SELECT retry_count FROM context_pack_requests WHERE id=${REQUEST_ID}")" [[ -z "$current_retry" ]] && current_retry=0 new_retry=$((current_retry + 1))
Escape single quotes trong error message cho SQL
local err_sql="${err_msg//'/''}"
Truncate long errors
[[ ${#err_sql} -gt 500 ]] && err_sql="${err_sql:0:500}..."
local new_status next_at_sql
§5.9 seed: max_retries=3 + backoff_seconds 3 entries → 3 retries total.
new_retry là attempt tiếp theo (1..max_retries = retry slot, >max → failed permanent)
if [[ $new_retry -le $max_retries ]]; then local idx backoff_s # backoff_seconds[new_retry - 1] (1st retry = index 0, 2nd = index 1, 3rd = index 2) idx=$((new_retry - 1)) backoff_s="$(jq -r ".backoff_seconds[${idx}] // .backoff_seconds[-1] // 300" <<< "$policy")" next_at_sql="now() + interval '${backoff_s} seconds'" new_status="pending" log_warn "release_failure: retry ${new_retry}/${max_retries} scheduled +${backoff_s}s — err='${err_msg}'" else next_at_sql="NULL" new_status="failed" log_err "release_failure: max_retries hit (${new_retry}>=${max_retries}) → status=failed — err='${err_msg}'" log_issue "CRITICAL" "context_pack_build_max_retries" "Build failed after ${new_retry} attempts: ${err_msg}" fi
run_pg_rw "UPDATE context_pack_requests SET status='${new_status}', retry_count=${new_retry}, next_retry_at=${next_at_sql}, last_error='${err_sql}', finished_at=now() WHERE id=${REQUEST_ID}" >/dev/null log_ok "release_failure DONE request_id=${REQUEST_ID} status=${new_status} retry_count=${new_retry}" }
-----------------------------------------------------------------------------
Bước 7g — REPAIR mode §6 Đ43
Detect state qua current symlink + PG request.manifest_id:
(A) symlink + request running + manifest_id=NULL → state post_fs_pre_db → FINALIZE
(B) symlink + request done + manifest_id set → nothing to do
(C) request failed → ROLLBACK (Phase 5)
-----------------------------------------------------------------------------
repair_publish() {
Đ43 audit R2 Fix A — repair KHÔNG bypass verify infra.
precheck() tự env_load + PG health + OUTPUT_ROOT từ dot_config + folder/operation check.
precheck log_ok "repair: precheck PASS (env + PG + OUTPUT_ROOT + folders + dot_operations)"
local live_link="${OUTPUT_ROOT}/current" if [[ ! -L "$live_link" ]]; then log_fatal "repair: ${live_link} không phải symlink — không có state để repair" exit 1 fi local target build_id_live target="$(readlink "$live_link")" build_id_live="$(basename "$target")" log_info "repair: symlink → ${target} (build_id=${build_id_live})"
if [[ ! -d "$target" ]]; then log_fatal "repair: dangling symlink, target missing: ${target}" exit 1 fi local manifest_file="${target}/.checksums.json" if [[ ! -f "$manifest_file" ]]; then log_fatal "repair: không có .checksums.json trong target" exit 1 fi
Đ43 audit R2 Fix A — manifest integrity gate: file size > 0 + JSON parse OK.
Fail → log FATAL + exit, KHÔNG gọi release() với manifest corrupt.
if [[ ! -s "$manifest_file" ]]; then log_fatal "repair: manifest integrity FAIL — ${manifest_file} empty (size=0)" exit 1 fi if ! jq empty "$manifest_file" >/dev/null 2>&1; then log_fatal "repair: manifest integrity FAIL — ${manifest_file} not valid JSON (jq parse fail)" exit 1 fi log_ok "repair: manifest integrity OK (size>0 + jq parse PASS)"
Lookup request row
local pg_row req_id req_status req_mid pg_row="$(run_pg_rw "SELECT id || '|' || status || '|' || COALESCE(manifest_id::text, '') FROM context_pack_requests WHERE detail->>'build_id' = '${build_id_live}' ORDER BY id DESC LIMIT 1")" if [[ -z "$pg_row" ]]; then log_fatal "repair: không tìm thấy context_pack_requests cho build_id=${build_id_live}" exit 1 fi IFS='|' read -r req_id req_status req_mid <<< "$pg_row" log_info "repair: request id=${req_id} status=${req_status} manifest_id=${req_mid:-NULL}"
if [[ "$req_status" == "done" && -n "$req_mid" ]]; then log_ok "repair: already complete (manifest_id=${req_mid}) — nothing to do" return 0 fi if [[ "$req_status" == "failed" ]]; then log_warn "repair: request=failed — ROLLBACK path (Phase 5 sẽ implement); exit" exit 0 fi
Gap 7f→8: 7f đã INSERT manifest nhưng release() chưa UPDATE status → close ra
if [[ "$req_status" == "running" && -n "$req_mid" ]]; then log_info "repair: state=post_db_pre_release (manifest_id=${req_mid} có, status vẫn running) → close release" MANIFEST_ID="$req_mid" REQUEST_ID="$req_id" release log_ok "repair CLOSE DONE manifest_id=${MANIFEST_ID} request_id=${REQUEST_ID}" return 0 fi if [[ "$req_status" != "running" || -n "$req_mid" ]]; then log_fatal "repair: ambiguous state req_status=${req_status} manifest_id=${req_mid}" exit 1 fi
FINALIZE path: state post_fs_pre_db → replay 7e + 7f + release
log_info "repair: state=post_fs_pre_db → FINALIZE (run 7e + 7f + release)" REQUEST_ID="$req_id" BUILD_ID="$build_id_live" TRIGGER_SOURCE="$(jq -r '.trigger_source' "$manifest_file")" GIT_COMMIT="$(jq -r '.git_commit' "$manifest_file")" LAW_COUNT="$(jq -r '.counts.law' "$manifest_file")" DOT_COUNT="$(jq -r '.counts.dot' "$manifest_file")" ENTITY_COUNT="$(jq -r '.counts.entity' "$manifest_file")" SPECIES_COUNT="$(jq -r '.counts.species' "$manifest_file")" DB_COUNT="$(jq -r '.counts.db' "$manifest_file")"
publish_kb publish_db release log_ok "repair FINALIZE DONE manifest_id=${MANIFEST_ID} request_id=${REQUEST_ID}" }
=============================================================================
Main flow
=============================================================================
main() { parse_args "$@"
log_info "${SCRIPT_NAME} v${VERSION}" log_info "trigger_source=${TRIGGER_SOURCE} dry_run=${DRY_RUN} repair=${REPAIR} verbose=${VERBOSE}"
if [[ $REPAIR -eq 1 ]]; then log_info "=== REPAIR MODE (§6 Bước 7g) ===" repair_publish log_ok "${SCRIPT_NAME} (repair) completed" exit 0 fi
log_info "=== Bước 1 PRECHECK ===" precheck
log_info "=== Bước 2 TRY-LOCK ===" try_lock
log_info "=== Bước 3 QUERY PG ===" query_pg
log_info "=== Bước 4 SCAN FS ===" scan_fs
log_info "=== Bước 5 GENERATE + 2 CHECKSUM ===" generate
log_info "=== Bước 6 VALIDATE ===" validate
log_info "=== Bước 7 PUBLISH ===" publish
log_info "=== Bước 8 RELEASE ===" release
log_ok "${SCRIPT_NAME} completed (trigger=${TRIGGER_SOURCE} dry_run=${DRY_RUN})" exit 0 }
main "$@"