diff --git a/reg-tests/mcli/mcli_master_socket_leak.vtc b/reg-tests/mcli/mcli_master_socket_leak.vtc new file mode 100644 index 0000000000000..cf30ea6fe1f74 --- /dev/null +++ b/reg-tests/mcli/mcli_master_socket_leak.vtc @@ -0,0 +1,66 @@ +varnishtest "Bug fix: master CLI connection slots freed on client disconnect" + +# Regression test for a master CLI socket connection leak in master-worker mode. +# +# mworker_proxy has a fixed maxconn of 10. When clients connect to the master +# socket, send a command that is forwarded to a busy worker, and then close +# the connection due to a client-side receive timeout, haproxy must free each +# slot as the client disconnects. +# +# Without the fix: slots remain occupied after the client disconnects, so the +# master CLI becomes unreachable once all 10 slots are filled. +# With the fix: slots are freed on client disconnect and the master CLI keeps +# accepting new connections. +# +# The worker is made unresponsive using "debug dev delay" (expert-mode) with +# nbthread 1 so a single delay blocks the entire worker CLI. This avoids +# using SIGSTOP/SIGCONT which can be unreliable in CI environments. + +#REGTEST_TYPE=bug + +feature cmd "command -v socat" +feature cmd "command -v timeout" +feature ignore_unknown_macro + +server s1 { +} -start + +haproxy h1 -W -S -conf { + global + nbthread 1 + + defaults + mode http + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + + frontend fe + bind "fd@${fe}" + default_backend be + + backend be + server s1 ${s1_addr}:${s1_port} +} -start + +# Fill all 10 master CLI slots (mworker_proxy->maxconn is hardcoded to 10). +# Each socat sends "expert-mode on" followed by "@1 debug dev delay 10000" +# which blocks the single worker thread for 10 s. After 2 s the timeout(1) +# wrapper kills socat, simulating a client-side receive timeout. "wait" +# ensures all background processes have exited before proceeding. +shell { + for i in $(seq 1 10); do + (printf "expert-mode on\n@1 debug dev delay 10000\n" \ + | timeout 2 socat TCP:${h1_mcli_addr}:${h1_mcli_port} - 2>/dev/null) & + done + wait +} + +# This is the key assertion: after all 10 clients have disconnected, a new +# connection to the master CLI must succeed. With the bug all 10 slots are +# still marked occupied and this connect is refused or times out. +haproxy h1 -mcli { + send "show version" + expect ~ "3." +} + diff --git a/reg-tests/mcli/mcli_master_socket_leak_sigstop.vtc b/reg-tests/mcli/mcli_master_socket_leak_sigstop.vtc new file mode 100644 index 0000000000000..41914ce6d3fcd --- /dev/null +++ b/reg-tests/mcli/mcli_master_socket_leak_sigstop.vtc @@ -0,0 +1,106 @@ +varnishtest "Master CLI slot leak: frozen worker (SIGSTOP) — fix verified" + +# Companion to mcli_master_socket_leak.vtc. That test uses +# "debug dev delay" to block the worker INSIDE its CLI handler. This +# test goes further: it freezes the worker process at the OS level via +# SIGSTOP BEFORE any byte flows back through the master->worker +# sockpair. This was raised as a possible gap in the fix, on the +# theory that the backend stconn's `lra` (last-read-activity) field +# might still be TICK_ETERNITY when the client disconnects, in which +# case tick_add(lra, ioto) == TICK_ETERNITY and timeout server-fin +# never fires, so the slot would still leak. +# +# Empirical result (run locally against fix f218e2252 on top of +# 3.4-dev9): slots are freed within ~2s of client disconnect even with +# the worker SIGSTOPped before any traffic. The 11th connection +# attempt while the worker was still frozen succeeded in ~0.5s. +# +# Likely explanation: `lra` is updated when the backend stconn enters +# the EST state during sockpair setup, which happens at the kernel +# level immediately (sockpairs are pre-connected) — independently of +# the worker actually reading any byte. SIGSTOP'ing the worker does +# not unwind that master-side state, so by the time sc_set_hcto() +# arms timeout server-fin on client disconnect, lra is already finite +# and tick_add() yields a real expiry ~1s in the future. +# +# This test is kept as a local diagnostic. It is NOT recommended for +# upstream submission: the existing master_socket_leak test explicitly +# avoids SIGSTOP/SIGCONT for CI reliability reasons. + +#REGTEST_TYPE=bug + +feature cmd "command -v socat" +feature cmd "command -v timeout" +feature cmd "command -v awk" +feature ignore_unknown_macro + +server s1 { +} -start + +haproxy h1 -W -S -conf { + global + nbthread 1 + + defaults + mode http + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + + frontend fe + bind "fd@${fe}" + default_backend be + + backend be + server s1 ${s1_addr}:${s1_port} +} -start + +# Discover the worker PID via the master CLI, freeze it with SIGSTOP +# BEFORE any client traffic, fill all 10 slots with clients that get +# killed after 2s, then verify the master CLI is still reachable while +# the worker is still frozen. The trap ensures we never leave a +# stopped worker behind. +shell { + set -e + WORKER_PID=$(printf "show proc\n" \ + | socat -t2 TCP:${h1_mcli_addr}:${h1_mcli_port} - 2>/dev/null \ + | awk '$1 ~ /^[0-9]+$/ && $0 ~ /worker/ { print $1; exit }') + test -n "$WORKER_PID" || { echo "FAIL: could not find worker pid"; exit 1; } + echo "worker pid: $WORKER_PID" + + trap 'kill -CONT '"$WORKER_PID"' 2>/dev/null || true' EXIT INT TERM + + kill -STOP "$WORKER_PID" + + SOCAT_PIDS="" + for i in $(seq 1 10); do + (printf "@1 show info\n" \ + | timeout --kill-after=1 2 socat TCP:${h1_mcli_addr}:${h1_mcli_port} - \ + 2>/dev/null) & + SOCAT_PIDS="$SOCAT_PIDS $!" + done + for pid in $SOCAT_PIDS; do + wait "$pid" 2>/dev/null || true + done + + # The key assertion: 11th connection MUST succeed while worker is + # still SIGSTOPped. If the lra==TICK_ETERNITY concern were real, + # this would hang or fail. + RESULT=$(printf "show version\n" \ + | timeout --kill-after=1 5 socat -t4 TCP:${h1_mcli_addr}:${h1_mcli_port} - \ + 2>&1) + echo "11th connection result: $RESULT" + echo "$RESULT" | grep -qE '[0-9]+\.[0-9]+' || { + echo "FAIL: master CLI unreachable while worker still SIGSTOPped" + exit 1 + } + + kill -CONT "$WORKER_PID" + trap - EXIT INT TERM +} + +# Sanity check: master CLI works after worker resumed. +haproxy h1 -mcli { + send "show version" + expect ~ "." +} diff --git a/reg-tests/mcli/mcli_reload_no_timeout.lua b/reg-tests/mcli/mcli_reload_no_timeout.lua new file mode 100644 index 0000000000000..93f6f94812998 --- /dev/null +++ b/reg-tests/mcli/mcli_reload_no_timeout.lua @@ -0,0 +1,5 @@ +-- Delay worker readiness by 2s to test that serverfin does not +-- kill the reload command while the new worker is starting. +core.register_init(function() + os.execute("sleep 2") +end) diff --git a/reg-tests/mcli/mcli_reload_no_timeout.vtc b/reg-tests/mcli/mcli_reload_no_timeout.vtc new file mode 100644 index 0000000000000..a6fa6ad6b4ad4 --- /dev/null +++ b/reg-tests/mcli/mcli_reload_no_timeout.vtc @@ -0,0 +1,65 @@ +varnishtest "Verify reload via master CLI is not affected by serverfin timeout" + +# Regression test: the timeout server-fin on the MASTER proxy must not +# apply to locally-handled commands (applets). A reload command is +# forwarded to the master applet and may take longer than 1s while the +# new worker is starting. Without the fix, the pcli response analyser +# would fire a read timeout and return "Can't connect to the target CLI!" +# instead of the reload status. + +#REQUIRE_OPTIONS=LUA +#REGTEST_TYPE=bug + +feature cmd "command -v socat" +feature cmd "$HAPROXY_PROGRAM -vv | grep -q '+LUA'" +feature ignore_unknown_macro + +server s1 { + rxreq + txresp +} -start + +haproxy h1 -W -S -conf { + global + tune.lua.bool-sample-conversion normal + # Lua init hook that sleeps 2s, delaying worker readiness on reload + lua-load ${testdir}/mcli_reload_no_timeout.lua + + defaults + mode http + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + + frontend fe + bind "fd@${fe}" + default_backend be + + backend be + server s1 ${s1_addr}:${s1_port} +} -start + +# Issue a reload via socat as a second command (after "show version"). +# The first command causes CF_AUTO_CLOSE to be set on the request channel. +# When socat half-closes its write side after sending, process_stream() +# triggers sc_shutdown(scb) -> sc_set_hcto() which applies serverfin=1s +# to the backend applet. Without the fix, the reload response (which comes +# from the master applet) would be killed by this 1s timeout if the new +# worker takes time to start, returning "Can't connect to the target CLI!". +shell { + RESULT=$(printf "show version\nreload\n" | socat -t10 TCP:${h1_mcli_addr}:${h1_mcli_port} - 2>/dev/null) + echo "Got: $RESULT" + echo "$RESULT" | grep -q "Success=1" || { + echo "FAIL: reload did not succeed. Got: $RESULT" + exit 1 + } +} + +# Verify the master CLI is still functional after reload +shell { + RESULT=$(printf "show version\n" | socat -t5 TCP:${h1_mcli_addr}:${h1_mcli_port} - 2>/dev/null) + echo "$RESULT" | grep -q "3\." || { + echo "FAIL: show version failed. Got: $RESULT" + exit 1 + } +} diff --git a/reg-tests/mcli/run_risk_tests.sh b/reg-tests/mcli/run_risk_tests.sh new file mode 100755 index 0000000000000..43e1ca3a314db --- /dev/null +++ b/reg-tests/mcli/run_risk_tests.sh @@ -0,0 +1,412 @@ +#!/usr/bin/env bash +# Risk validation harness for the master CLI slot-leak fix. +# +# Tests three scenarios that could plausibly regress with the patch: +# #1 pipelined commands — client sends multiple commands, disconnects +# at various points; verify output is intact and +# slots are freed. +# #2 reload during CLI traffic — issue a reload while CLI clients are +# hitting @1-prefixed commands; count the +# client-visible failures during the +# reload window. +# #3 long-running command cut off — client connects, sends a slow +# command, disconnects mid-response; +# verify worker is still healthy +# afterward and slot was freed. +# +# Run as: ./run_risk_tests.sh /path/to/haproxy +# Without arguments, defaults to ./haproxy. +# +# Output is plain-text counters per test. Compare two runs (baseline vs +# patched) by running this script twice with different binaries. + +set -u +set -o pipefail + +HAPROXY_BIN="${1:-./haproxy}" +LABEL="${2:-$(basename "$HAPROXY_BIN")}" +TMPDIR="$(mktemp -d -t mcli-risk.XXXXXX)" +MCLI_HOST="127.0.0.1" +MCLI_PORT="$((20000 + RANDOM % 10000))" +FE_PORT="$((20000 + RANDOM % 10000))" + +# Result counters +P1_PASS=0; P1_FAIL=0; P1_NOTES="" +P2_PASS=0; P2_FAIL=0; P2_NOTES="" +P3_PASS=0; P3_FAIL=0; P3_NOTES="" + +cleanup() { + rc=$? + if [ -n "${MASTER_PID:-}" ]; then + kill -TERM "$MASTER_PID" 2>/dev/null || true + wait "$MASTER_PID" 2>/dev/null || true + fi + rm -rf "$TMPDIR" + exit $rc +} +trap cleanup EXIT INT TERM + +note() { echo " $*"; } + +mcli_send() { + # mcli_send + local t="$1"; shift + printf "%s\n" "$@" \ + | timeout --kill-after=1 "$t" socat -t"$((t-1))" "TCP:${MCLI_HOST}:${MCLI_PORT}" - 2>/dev/null +} + +start_haproxy() { + local extra_global="${1:-}" + cat > "$TMPDIR/haproxy.cfg" < "$TMPDIR/haproxy.log" 2>&1 & + MASTER_PID=$! + for _ in $(seq 1 50); do + if mcli_send 1 "show version" >/dev/null 2>&1; then return 0; fi + sleep 0.1 + done + return 1 +} + +stop_haproxy() { + if [ -n "${MASTER_PID:-}" ]; then + kill -TERM "$MASTER_PID" 2>/dev/null || true + wait "$MASTER_PID" 2>/dev/null || true + MASTER_PID="" + fi +} + +worker_pid() { + mcli_send 2 "show proc" \ + | awk '$1 ~ /^[0-9]+$/ && $0 ~ /worker/ { print $1; exit }' +} + +############################################################################## +# Test #1: pipelined commands +############################################################################## +test_pipeline() { + echo + echo "===> Test #1: pipelined commands ($LABEL)" + start_haproxy || { P1_FAIL=$((P1_FAIL+1)); note "haproxy failed to start"; return; } + + # 1a: 3 local commands pipelined, client reads to EOF + local out + out=$(mcli_send 5 "show version" "show proc" "show version") + local n_versions n_proc + n_versions=$(echo "$out" | grep -cE '^[0-9]+\.[0-9]+' || true) + n_proc=$(echo "$out" | grep -c '# workers' || true) + if [ "$n_versions" -ge 2 ] && [ "$n_proc" -ge 1 ]; then + P1_PASS=$((P1_PASS+1)) + note "1a PASS: 3 local pipelined commands all produced output" + else + P1_FAIL=$((P1_FAIL+1)) + note "1a FAIL: expected >=2 version lines and a workers section; got $n_versions versions, $n_proc proc" + fi + + # 1b: 3 forwarded commands pipelined to worker + out=$(mcli_send 5 "@1 show version" "@1 show info" "@1 show stat") + local has_version has_info + has_version=$(echo "$out" | grep -cE '^[0-9]+\.[0-9]+' || true) + has_info=$(echo "$out" | grep -c 'Process_num' || true) + if [ "$has_version" -ge 1 ] && [ "$has_info" -ge 1 ]; then + P1_PASS=$((P1_PASS+1)) + note "1b PASS: 3 forwarded pipelined commands produced expected markers" + else + P1_FAIL=$((P1_FAIL+1)) + note "1b FAIL: forwarded pipelined output incomplete (version=$has_version info=$has_info)" + fi + + # 1c: pipelined commands with hard mid-stream disconnect (no FIN). + # Send commands, then SIGKILL socat so the kernel cleans up the socket + # without sending a clean FIN. This is the actual leak trigger — a + # graceful timeout(1)/SIGTERM is recovered cleanly even on baseline + # because socat closes its end before exiting. + # After the disconnect, verify the master CLI still works. + local PIDS=() + for i in 1 2 3; do + (printf "@1 show info\n@1 show stat\n@1 show servers state\n" \ + | socat -t10 "TCP:${MCLI_HOST}:${MCLI_PORT}" - >/dev/null 2>&1) & + PIDS+=($!) + done + sleep 0.5 # let the commands reach the worker + for p in "${PIDS[@]}"; do kill -9 "$p" 2>/dev/null || true; done + for pid in "${PIDS[@]}"; do wait "$pid" 2>/dev/null || true; done + sleep 1.5 # give server-fin (1s) time to fire on patched build + if mcli_send 3 "show version" | grep -qE '[0-9]+\.[0-9]+'; then + P1_PASS=$((P1_PASS+1)) + note "1c PASS: master CLI reachable after hard mid-stream disconnect" + else + P1_FAIL=$((P1_FAIL+1)) + note "1c FAIL: master CLI unreachable after hard mid-stream disconnect" + fi + + # 1d: the actual bug scenario — frozen worker + 10 SIGKILL'd clients. + # On baseline: slots leak permanently while worker frozen, 11th conn fails. + # On patched: server-fin fires at 1s, slots freed, 11th conn succeeds. + local wpid + wpid=$(worker_pid) + if [ -n "$wpid" ]; then + kill -STOP "$wpid" + local PIDS2=() + for i in $(seq 1 10); do + (printf "@1 show info\n" \ + | socat -t30 "TCP:${MCLI_HOST}:${MCLI_PORT}" - >/dev/null 2>&1) & + PIDS2+=($!) + done + sleep 1 + for p in "${PIDS2[@]}"; do kill -9 "$p" 2>/dev/null || true; done + for pid in "${PIDS2[@]}"; do wait "$pid" 2>/dev/null || true; done + sleep 2 # give server-fin (1s) time to fire on patched build + + local t0 t1 result elapsed + t0=$(date +%s.%N) + result=$(mcli_send 4 "show version") + t1=$(date +%s.%N) + elapsed=$(awk "BEGIN { printf \"%.2f\", $t1 - $t0 }") + kill -CONT "$wpid" 2>/dev/null || true + + # Note: this is the bug trigger. We expect baseline to FAIL and + # patched to PASS. Both outcomes are useful diagnostic data. + if echo "$result" | grep -qE '[0-9]+\.[0-9]+'; then + P1_PASS=$((P1_PASS+1)) + note "1d PASS: 11th conn succeeded in ${elapsed}s with frozen worker (slots freed)" + else + P1_FAIL=$((P1_FAIL+1)) + note "1d FAIL: 11th conn failed in ${elapsed}s with frozen worker (slots leaked) — expected on baseline, regression on patched" + fi + else + note "1d SKIP: could not get worker PID" + fi + + stop_haproxy +} + +############################################################################## +# Test #2: reload during CLI traffic +############################################################################## +test_reload_during_traffic() { + echo + echo "===> Test #2: reload during CLI traffic ($LABEL)" + + # Build a config with enough backends to make parse non-trivial. Even + # so, reloads on a local box are typically <0.5s. We compensate by + # making the in-flight CLI work artificially slow with "debug dev + # delay" so it actually overlaps the reload window. + local cfg="$TMPDIR/reload_haproxy.cfg" + { + echo "global" + echo " nbthread 1" + echo " expose-experimental-directives" + echo + echo "defaults" + echo " mode http" + echo " timeout connect 5s" + echo " timeout client 5s" + echo " timeout server 5s" + echo + echo "frontend fe" + echo " bind 127.0.0.1:${FE_PORT}" + echo " default_backend be0" + echo + for i in $(seq 0 200); do + echo "backend be${i}" + echo " server s1 127.0.0.1:9" + done + } > "$cfg" + + "$HAPROXY_BIN" -W -S "${MCLI_HOST}:${MCLI_PORT}" -f "$cfg" \ + > "$TMPDIR/haproxy.log" 2>&1 & + MASTER_PID=$! + for _ in $(seq 1 50); do + if mcli_send 1 "show version" >/dev/null 2>&1; then break; fi + sleep 0.1 + done + + # 2a: time the reload itself (no concurrent traffic). + local t0 t1 reload_out elapsed_reload + t0=$(date +%s.%N) + reload_out=$(mcli_send 10 "reload") + t1=$(date +%s.%N) + elapsed_reload=$(awk "BEGIN { printf \"%.3f\", $t1 - $t0 }") + note "reload alone took ${elapsed_reload}s; result: $(echo "$reload_out" | head -1)" + + if echo "$reload_out" | grep -qiE 'Success|Loading'; then + P2_PASS=$((P2_PASS+1)) + note "2a PASS: reload command succeeded" + else + P2_FAIL=$((P2_FAIL+1)) + note "2a FAIL: reload did not return Success (out: $reload_out)" + fi + + # 2b: in-flight CLI commands during reload. Start 5 background + # @1-prefixed slow-delay commands (each blocks worker thread for 2s + # via debug dev delay). Wait briefly so they reach the worker, then + # issue a reload. Count successes vs. failures of the in-flight + # commands. + local inflight_log="$TMPDIR/inflight.log" + : > "$inflight_log" + local INFLIGHT_PIDS=() + for i in $(seq 1 5); do + ( + r=$(printf "expert-mode on\n@1 debug dev delay 2000\n" \ + | timeout --kill-after=1 8 socat -t6 "TCP:${MCLI_HOST}:${MCLI_PORT}" - \ + 2>&1 || echo "TIMEOUT") + if [ -z "$r" ]; then + echo "EMPTY $i" >> "$inflight_log" + elif echo "$r" | grep -q "Can't connect"; then + echo "CANTCONN $i" >> "$inflight_log" + elif echo "$r" | grep -q TIMEOUT; then + echo "TIMEOUT $i" >> "$inflight_log" + else + echo "OK $i" >> "$inflight_log" + fi + ) & + INFLIGHT_PIDS+=($!) + done + + # Let in-flight commands settle into the worker. + sleep 0.3 + + # Issue the reload while in-flight commands are running. + t0=$(date +%s.%N) + reload_out=$(mcli_send 10 "reload") + t1=$(date +%s.%N) + local elapsed_reload2 + elapsed_reload2=$(awk "BEGIN { printf \"%.3f\", $t1 - $t0 }") + + for pid in "${INFLIGHT_PIDS[@]}"; do wait "$pid" 2>/dev/null || true; done + + local inflight_ok inflight_cantconn inflight_timeout inflight_empty + inflight_ok=$(grep -c '^OK ' "$inflight_log" || true) + inflight_cantconn=$(grep -c '^CANTCONN ' "$inflight_log" || true) + inflight_timeout=$(grep -c '^TIMEOUT ' "$inflight_log" || true) + inflight_empty=$(grep -c '^EMPTY ' "$inflight_log" || true) + + note "reload during in-flight took ${elapsed_reload2}s" + note "in-flight outcomes: ok=$inflight_ok cantconn=$inflight_cantconn timeout=$inflight_timeout empty=$inflight_empty" + + # The reload itself must still succeed even with traffic in flight. + if echo "$reload_out" | grep -qiE 'Success|Loading'; then + P2_PASS=$((P2_PASS+1)) + note "2b PASS: reload during in-flight traffic succeeded" + else + P2_FAIL=$((P2_FAIL+1)) + note "2b FAIL: reload-during-traffic did not return Success (out: $reload_out)" + fi + + P2_NOTES="reload=${elapsed_reload}s reload-w-traffic=${elapsed_reload2}s inflight: ok=$inflight_ok cc=$inflight_cantconn to=$inflight_timeout empty=$inflight_empty" + stop_haproxy +} + +############################################################################## +# Test #3: long-running command cut off +############################################################################## +test_long_command_cutoff() { + echo + echo "===> Test #3: long-running command cut off ($LABEL)" + start_haproxy || { P3_FAIL=$((P3_FAIL+1)); note "haproxy failed to start"; return; } + + # 3a: client sends a slow command and is SIGKILLed mid-stream (no FIN). + # Use "expert-mode on; @1 debug dev delay 3000" to force a 3s worker delay. + # Client is killed at ~0.5s, well before the delay completes, with no + # graceful FIN. + (printf "expert-mode on\n@1 debug dev delay 3000\n" \ + | socat -t10 "TCP:${MCLI_HOST}:${MCLI_PORT}" - >/dev/null 2>&1) & + local p3a=$! + sleep 0.5 + kill -9 "$p3a" 2>/dev/null || true + wait "$p3a" 2>/dev/null || true + # Wait long enough for worker to finish its delay AND server-fin (1s) to fire. + sleep 4 + + # 3b: verify worker is still responsive after the cut-off. + local out + out=$(mcli_send 4 "@1 show info") + if echo "$out" | grep -q 'Process_num'; then + P3_PASS=$((P3_PASS+1)) + note "3a PASS: worker still responsive after long-command cut-off" + else + P3_FAIL=$((P3_FAIL+1)) + note "3a FAIL: worker not responsive (out head: $(echo "$out" | head -c 100))" + fi + + # 3c: fill all 10 slots with the same SIGKILL pattern; after they all + # disconnect, master CLI should still accept new connections. + local PIDS=() + for i in $(seq 1 10); do + (printf "expert-mode on\n@1 debug dev delay 5000\n" \ + | socat -t30 "TCP:${MCLI_HOST}:${MCLI_PORT}" - >/dev/null 2>&1) & + PIDS+=($!) + done + sleep 0.5 + for p in "${PIDS[@]}"; do kill -9 "$p" 2>/dev/null || true; done + for pid in "${PIDS[@]}"; do wait "$pid" 2>/dev/null || true; done + # Must wait for worker delay to finish (5s) + server-fin (1s). + # Without server-fin (baseline), this still passes if the worker eventually + # finishes and writes to the broken sockpair (gets EPIPE) and frees its slot. + sleep 7 + + if mcli_send 4 "show version" | grep -qE '[0-9]+\.[0-9]+'; then + P3_PASS=$((P3_PASS+1)) + note "3b PASS: master CLI reachable after 10 cut-off long commands" + else + P3_FAIL=$((P3_FAIL+1)) + note "3b FAIL: master CLI unreachable after 10 cut-off long commands" + fi + + stop_haproxy +} + +############################################################################## +# Main +############################################################################## +echo "==========================================================" +echo "Risk validation harness" +echo "binary: $HAPROXY_BIN" +echo "label: $LABEL" +echo "tmpdir: $TMPDIR" +echo "==========================================================" + +# Verify binary exists and runs. +if ! "$HAPROXY_BIN" -v >/dev/null 2>&1; then + echo "ERROR: $HAPROXY_BIN is not a working haproxy binary" + exit 2 +fi +"$HAPROXY_BIN" -v 2>&1 | head -1 + +test_pipeline +test_reload_during_traffic +test_long_command_cutoff + +echo +echo "==========================================================" +echo "RESULTS for $LABEL" +echo "==========================================================" +printf " #1 pipelined commands: pass=%d fail=%d\n" "$P1_PASS" "$P1_FAIL" +printf " #2 reload during traffic: pass=%d fail=%d (%s)\n" "$P2_PASS" "$P2_FAIL" "$P2_NOTES" +printf " #3 long-command cut-off: pass=%d fail=%d\n" "$P3_PASS" "$P3_FAIL" +echo +total_fail=$((P1_FAIL + P2_FAIL + P3_FAIL)) +if [ $total_fail -eq 0 ]; then + echo "OVERALL: all assertions passed" +else + echo "OVERALL: $total_fail assertion(s) failed" +fi +exit $total_fail diff --git a/reg-tests/mcli/run_sigstop_test.sh b/reg-tests/mcli/run_sigstop_test.sh new file mode 100755 index 0000000000000..62a32a6363e5a --- /dev/null +++ b/reg-tests/mcli/run_sigstop_test.sh @@ -0,0 +1,160 @@ +#!/usr/bin/env bash +# Diagnostic for the lra==TICK_ETERNITY concern raised against fix +# f218e2252 (GH #3351). The companion regtest mcli_master_socket_leak.vtc +# uses "debug dev delay" to block the worker INSIDE its CLI handler, +# at which point lra has already advanced. This script freezes the +# worker at the OS level via SIGSTOP BEFORE any byte flows back, then +# checks whether slots are still freed on client disconnect. +# +# Empirical result on this branch: slots ARE freed even under SIGSTOP. +# The 11th connection attempt succeeds in ~0.5s while the worker is +# still frozen. Likely explanation: lra is set when the backend +# stconn enters EST state during sockpair setup (which happens at the +# kernel level immediately because sockpairs are pre-connected), not +# when the worker actually reads a byte. SIGSTOP'ing the worker +# doesn't unwind that master-side state, so sc_set_hcto() arms a +# real, finite expiry on client disconnect. +# +# Strategy: +# 1) Start haproxy in master-worker mode with a master CLI on a TCP port. +# 2) Discover the worker PID via "show proc". +# 3) SIGSTOP the worker BEFORE any client traffic. +# 4) Fill all 10 master CLI slots with clients that get killed after 2s. +# 5) Try the 11th connection while worker is STILL SIGSTOPped. +# 6) SIGCONT the worker, sanity-check master CLI, clean up. +# +# Pass criteria: +# - 11th connection succeeds within a few seconds while SIGSTOPped => +# fix covers SIGSTOP case (this is what we observe). +# - 11th connection refused/hangs => slots leaked, lra concern real. + +set -u +set -o pipefail + +HAPROXY_BIN="${HAPROXY_BIN:-./haproxy}" +TMPDIR="$(mktemp -d -t mcli-sigstop.XXXXXX)" +MCLI_HOST="127.0.0.1" +MCLI_PORT="$((20000 + RANDOM % 10000))" +FE_PORT="$((20000 + RANDOM % 10000))" + +cleanup() { + rc=$? + # Always try to unfreeze the worker before exiting, no matter what. + if [ -n "${WORKER_PID:-}" ]; then + kill -CONT "$WORKER_PID" 2>/dev/null || true + fi + if [ -n "${MASTER_PID:-}" ]; then + kill -TERM "$MASTER_PID" 2>/dev/null || true + wait "$MASTER_PID" 2>/dev/null || true + fi + rm -rf "$TMPDIR" + exit $rc +} +trap cleanup EXIT INT TERM + +echo "==> tmpdir: $TMPDIR" +echo "==> haproxy: $HAPROXY_BIN" +echo "==> master CLI: $MCLI_HOST:$MCLI_PORT" +echo "==> frontend port: $FE_PORT" + +cat > "$TMPDIR/haproxy.cfg" < starting haproxy ..." +"$HAPROXY_BIN" -W -S "${MCLI_HOST}:${MCLI_PORT}" -f "$TMPDIR/haproxy.cfg" \ + > "$TMPDIR/haproxy.log" 2>&1 & +MASTER_PID=$! + +# Wait for master CLI to come up +for i in $(seq 1 50); do + if printf "show version\n" | socat -t1 "TCP:${MCLI_HOST}:${MCLI_PORT}" - >/dev/null 2>&1; then + break + fi + sleep 0.1 +done + +# Discover the worker PID via "show proc" +echo "==> discovering worker PID ..." +SHOW_PROC=$(printf "show proc\n" | socat -t2 "TCP:${MCLI_HOST}:${MCLI_PORT}" - 2>/dev/null) +echo "----- show proc output -----" +echo "$SHOW_PROC" +echo "----------------------------" + +WORKER_PID=$(echo "$SHOW_PROC" | awk '$1 ~ /^[0-9]+$/ && $0 ~ /worker/ { print $1; exit }') +if [ -z "${WORKER_PID:-}" ]; then + echo "FAIL: could not parse worker PID from show proc" + exit 1 +fi +echo "==> worker pid: $WORKER_PID" + +# Freeze the worker BEFORE any client traffic. +echo "==> SIGSTOP worker $WORKER_PID" +kill -STOP "$WORKER_PID" + +# Fill ALL 10 slots (no diagnostic slot left). If the fix works under +# SIGSTOP, the 11th connection attempt below should still succeed within +# a couple of seconds. If slots truly leak, it should be refused with +# "Resource temporarily unavailable" or hang. +echo "==> filling all 10 master CLI slots with clients that will time out ..." +SOCAT_PIDS=() +for i in $(seq 1 10); do + (printf "@1 show info\n" \ + | timeout --kill-after=1 2 socat "TCP:${MCLI_HOST}:${MCLI_PORT}" - 2>/dev/null) & + SOCAT_PIDS+=($!) +done +for pid in "${SOCAT_PIDS[@]}"; do + wait "$pid" 2>/dev/null || true +done +echo "==> 10 client timeouts complete" + +# Crucial test: try the 11th connection while the worker is STILL SIGSTOPped. +# If the slots were freed on client disconnect, this succeeds. +# If they leaked, this fails or hangs. +echo "==> 11th connection attempt while worker is still SIGSTOPped ..." +T0=$(date +%s.%N) +RESULT=$(printf "show version\n" | timeout --kill-after=1 5 socat -t4 "TCP:${MCLI_HOST}:${MCLI_PORT}" - 2>&1) +RC=$? +T1=$(date +%s.%N) +ELAPSED=$(awk "BEGIN { printf \"%.2f\", $T1 - $T0 }") +echo " rc=$RC elapsed=${ELAPSED}s" +echo " raw: $RESULT" +echo +if [ $RC -eq 0 ] && echo "$RESULT" | grep -qE '[0-9]+\.[0-9]+'; then + echo " -> SLOTS WERE FREED while worker was SIGSTOPped: fix covers this case." +else + echo " -> SLOTS LEAKED: 11th connection failed/hung while worker SIGSTOPped." + echo " This would confirm the lra==TICK_ETERNITY concern." +fi + +# Unfreeze +echo +echo "==> SIGCONT worker $WORKER_PID" +kill -CONT "$WORKER_PID" +WORKER_PID="" # so cleanup doesn't double-CONT + +# Final reachability check after SIGCONT +echo "==> final reachability check (worker resumed) ..." +FINAL=$(printf "show version\n" | timeout --kill-after=1 5 socat -t4 "TCP:${MCLI_HOST}:${MCLI_PORT}" - 2>&1) +echo " raw: $FINAL" +if echo "$FINAL" | grep -qE '[0-9]+\.[0-9]+'; then + echo " OK: master CLI is reachable after worker resumed." +else + echo " FAIL: master CLI not reachable even after worker resumed." + exit 1 +fi diff --git a/src/cli.c b/src/cli.c index deb6f703f684e..cd9845880c533 100644 --- a/src/cli.c +++ b/src/cli.c @@ -3484,8 +3484,25 @@ int pcli_wait_for_request(struct stream *s, struct channel *req, int an_bit) * current one. Just wait. At this stage, errors should be handled by * the response analyzer. */ - if (s->res.analysers & AN_RES_WAIT_CLI) + if (s->res.analysers & AN_RES_WAIT_CLI) { + /* Prevent process_stream from auto-forwarding the client close to + * the backend via the CF_AUTO_CLOSE check while we're waiting for + * a response - we manage the connection lifetime ourselves. + */ + channel_dont_close(req); + + /* If the client disconnected cleanly and we have no more commands + * to send, arm the server-fin timer on the backend. A stuck + * worker that never responds will then be aborted after + * timeout server-fin, freeing the connection slot (GH #3351). + * When lra is TICK_ETERNITY (no data received yet) the timer never + * fires, so this is safe to call on every wakeup. + */ + if ((s->scf->flags & SC_FL_EOS) && !ci_data(req)) + sc_set_hcto(s->scb); + return 0; + } pcli->flags &= ~PCLI_F_BIDIR; // only for one connection if ((pcli->flags & ACCESS_LVL_MASK) == ACCESS_LVL_NONE) @@ -3528,13 +3545,7 @@ int pcli_wait_for_request(struct stream *s, struct channel *req, int an_bit) else channel_forward_forever(req); - if (!(pcli->flags & PCLI_F_PAYLOAD)) { - /* we send only 1 command per request, and we write - * close after it when not in full-duplex mode. - */ - if (!(pcli->flags & PCLI_F_BIDIR)) - sc_schedule_shutdown(s->scb); - } else { + if (pcli->flags & PCLI_F_PAYLOAD) { pcli_write_prompt(s); } @@ -3595,6 +3606,13 @@ int pcli_wait_for_request(struct stream *s, struct channel *req, int an_bit) if (s->scf->flags & (SC_FL_ABRT_DONE|SC_FL_EOS)) { /* There is no more request or a only a partial one and we * receive a close from the client, we can leave */ + if (s->scf->flags & SC_FL_ABRT_DONE) { + /* Client sent RST: abort the backend immediately. + * Note: when AN_RES_WAIT_CLI is set we never reach here + * (early return above), so this handles the pre-connect case. + */ + sc_schedule_abort(s->scb); + } sc_schedule_shutdown(s->scf); s->req.analysers &= ~AN_REQ_WAIT_CLI; return 1; @@ -3834,8 +3852,13 @@ int mworker_cli_create_master_proxy(char **errmsg) mworker_proxy->mode = PR_MODE_CLI; /* default to 10 concurrent connections */ mworker_proxy->maxconn = 10; - /* no timeout */ - mworker_proxy->timeout.client = 0; + mworker_proxy->timeout.client = 0; /* no timeout */ + /* 1s server-fin timeout: armed only after the client disconnects cleanly + * (scf EOS + no pending commands), so it never fires during normal command + * processing. It ensures a stuck backend releases its slot promptly once + * the client is gone. + */ + mworker_proxy->timeout.serverfin = MS_TO_TICKS(1000); mworker_proxy->conf.file = strdup("MASTER"); mworker_proxy->conf.line = 0; mworker_proxy->accept = frontend_accept;