The upstream WAF (flights.test.aeroflot.ru) is rate-limiting the corp-
VPN exit IP that pve-201's tunnel uses, returning HTML block-pages or
403s for /api/* requests. Every recent ci-deploy run died in pre-warm
or with cached HTML poisoning the SSR; we've sunk a chunk of time on
WAF mitigations (browser UA, cache-bypass, proxy_no_cache, body
validation) and the WAF still wins. Fixing the WAF is customer-side.
Until that's resolved, the e2e suite is dead weight in CI — every run
fails for upstream-only reasons. Pull it from ci-deploy entirely:
* Removed: tunnel-reachability diagnose, /api pre-warm, Playwright
install, Playwright run, the e2e branch in the rollback condition,
and the playwright-report artifact path.
* Kept: build, deploy, swap, wait-for-health (against the SSR root,
which is local nginx → docker, no upstream involved).
release-verify already had its e2e block removed (commit 36bb2d9);
release.yml comment touched up to match.
Specs and playwright.config.ts stay in the tree — they're still useful
for local runs (`pnpm test:e2e`) once we're back on a network position
the WAF tolerates.
This commit is contained in:
@@ -102,117 +102,8 @@ jobs:
|
|||||||
BASIC_AUTH_PASS: ${{ secrets.BASIC_AUTH_PASS }}
|
BASIC_AUTH_PASS: ${{ secrets.BASIC_AUTH_PASS }}
|
||||||
run: scripts/ci/wait-for-url.sh https://ui-dashboard.gnerim.ru/ 30 2
|
run: scripts/ci/wait-for-url.sh https://ui-dashboard.gnerim.ru/ 30 2
|
||||||
|
|
||||||
- name: Diagnose tunnel reachability
|
|
||||||
id: tunnel_check
|
|
||||||
env:
|
|
||||||
BASIC_AUTH_USER: ${{ secrets.BASIC_AUTH_USER }}
|
|
||||||
BASIC_AUTH_PASS: ${{ secrets.BASIC_AUTH_PASS }}
|
|
||||||
# The upstream WAF blocks the default curl UA — every probe needs
|
|
||||||
# a browser-like User-Agent or it gets the HTML block page.
|
|
||||||
run: |
|
|
||||||
UA='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36'
|
|
||||||
echo "--- /api/health (expect 200 + x-envoy-upstream-service-time + x-cache-status) ---"
|
|
||||||
curl -k -sSI -A "$UA" -u "$BASIC_AUTH_USER:$BASIC_AUTH_PASS" https://ui-dashboard.gnerim.ru/api/health | head -15
|
|
||||||
echo "--- /api/dictionary/1/world_regions (expect JSON, ~5KB) ---"
|
|
||||||
curl -k -sS -A "$UA" -u "$BASIC_AUTH_USER:$BASIC_AUTH_PASS" \
|
|
||||||
-w "\n[size=%{size_download} time=%{time_total}s code=%{http_code}]\n" \
|
|
||||||
https://ui-dashboard.gnerim.ru/api/dictionary/1/world_regions | head -c 400; echo
|
|
||||||
echo "--- second hit on the same dict (expect HIT) ---"
|
|
||||||
curl -k -sSI -A "$UA" -u "$BASIC_AUTH_USER:$BASIC_AUTH_PASS" \
|
|
||||||
https://ui-dashboard.gnerim.ru/api/dictionary/1/world_regions | grep -iE "^HTTP|x-cache|x-envoy"
|
|
||||||
|
|
||||||
- name: Pre-warm /api cache (dictionaries shared across e2e specs)
|
|
||||||
id: cache_warmup
|
|
||||||
env:
|
|
||||||
BASIC_AUTH_USER: ${{ secrets.BASIC_AUTH_USER }}
|
|
||||||
BASIC_AUTH_PASS: ${{ secrets.BASIC_AUTH_PASS }}
|
|
||||||
# Two-step warm with body validation. Run 544 was bitten by cache
|
|
||||||
# poisoning: the upstream WAF returned its HTML block-page with
|
|
||||||
# HTTP 200, the previous prewarm only checked %{http_code}, so
|
|
||||||
# nginx happily cached the HTML as a valid 200 for 6h and every
|
|
||||||
# subsequent dictionary read returned HTML instead of JSON.
|
|
||||||
#
|
|
||||||
# Step 1: validate upstream via cache-bust query (`?_=<ts>` lands
|
|
||||||
# on a unique nginx cache key, forcing an upstream fetch).
|
|
||||||
# Step 2: warm + validate the canonical URL. If the canonical
|
|
||||||
# response is HTML, attempt one cache-bypass retry
|
|
||||||
# (`Cache-Control: no-cache` — works after the matching
|
|
||||||
# nginx config update). If still HTML, fail loudly with a
|
|
||||||
# purge instruction so the operator can clear cache.
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
# The upstream WAF blocks the default curl UA — every fetch must
|
|
||||||
# send a browser-like User-Agent or it returns the HTML block page.
|
|
||||||
UA='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36'
|
|
||||||
is_json() {
|
|
||||||
local body="$1"
|
|
||||||
local first_byte=${body:0:1}
|
|
||||||
[ "$first_byte" = "[" ] || [ "$first_byte" = "{" ] || return 1
|
|
||||||
[ ${#body} -gt 1024 ] || return 1
|
|
||||||
}
|
|
||||||
fail_with_body() {
|
|
||||||
local label="$1" body="$2"
|
|
||||||
echo "::error::pre-warm failed: $label" >&2
|
|
||||||
echo "first 200 bytes of body:" >&2
|
|
||||||
printf '%s\n' "${body:0:200}" >&2
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
for path in world_regions countries cities airports; do
|
|
||||||
base="https://ui-dashboard.gnerim.ru/api/dictionary/1/${path}"
|
|
||||||
|
|
||||||
# Step 1: prove upstream is healthy (cache-bust via query).
|
|
||||||
bust_url="${base}?_=$(date +%s%N)"
|
|
||||||
bust_body=$(curl -k -sS -A "$UA" -u "$BASIC_AUTH_USER:$BASIC_AUTH_PASS" \
|
|
||||||
--max-time 15 "$bust_url")
|
|
||||||
if ! is_json "$bust_body"; then
|
|
||||||
fail_with_body "${path} upstream returned non-JSON (WAF rate-limit?)" "$bust_body"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Step 2: warm + validate canonical URL.
|
|
||||||
cano_body=$(curl -k -sS -A "$UA" -u "$BASIC_AUTH_USER:$BASIC_AUTH_PASS" \
|
|
||||||
--max-time 15 "$base")
|
|
||||||
if ! is_json "$cano_body"; then
|
|
||||||
# Canonical hit poisoned cache. Force-refresh once via
|
|
||||||
# `Cache-Control: no-cache` (proxy_cache_bypass on the
|
|
||||||
# /api/dictionary/ location forwards to upstream, then
|
|
||||||
# stores the fresh response).
|
|
||||||
cano_body=$(curl -k -sS -A "$UA" -u "$BASIC_AUTH_USER:$BASIC_AUTH_PASS" \
|
|
||||||
-H "Cache-Control: no-cache" \
|
|
||||||
--max-time 15 "$base")
|
|
||||||
if ! is_json "$cano_body"; then
|
|
||||||
echo "::error::cache poisoned for ${path} — Cache-Control: no-cache did not refresh" >&2
|
|
||||||
echo "::error::manual purge: ssh pve-201 'rm -rf /var/cache/nginx/flights-api/* && systemctl reload nginx'" >&2
|
|
||||||
fail_with_body "${path} canonical URL still non-JSON after bypass" "$cano_body"
|
|
||||||
fi
|
|
||||||
echo "warm $path -> ok via cache-bypass (cache had been poisoned, now refreshed; ${#cano_body} bytes)"
|
|
||||||
else
|
|
||||||
echo "warm $path -> ok (${#cano_body} bytes)"
|
|
||||||
fi
|
|
||||||
sleep 2
|
|
||||||
done
|
|
||||||
echo "--- verify cache HIT on a re-fetch ---"
|
|
||||||
curl -k -sSI -A "$UA" -u "$BASIC_AUTH_USER:$BASIC_AUTH_PASS" \
|
|
||||||
https://ui-dashboard.gnerim.ru/api/dictionary/1/cities \
|
|
||||||
| grep -iE "^HTTP|x-cache-status"
|
|
||||||
|
|
||||||
- name: Install Playwright browsers
|
|
||||||
id: playwright_install
|
|
||||||
run: pnpm exec playwright install --with-deps chromium
|
|
||||||
|
|
||||||
- name: Run Playwright e2e
|
|
||||||
id: e2e
|
|
||||||
env:
|
|
||||||
BASE_URL: https://ui-dashboard.gnerim.ru
|
|
||||||
BASIC_AUTH_USER: ${{ secrets.BASIC_AUTH_USER }}
|
|
||||||
BASIC_AUTH_PASS: ${{ secrets.BASIC_AUTH_PASS }}
|
|
||||||
# Skip Angular↔React parity gaps + UI-behavior mismatches that
|
|
||||||
# need separate triage. release-verify runs the full suite.
|
|
||||||
CI_DEPLOY: '1'
|
|
||||||
run: pnpm test:e2e
|
|
||||||
|
|
||||||
- name: Rollback on failure (post-deploy steps)
|
- name: Rollback on failure (post-deploy steps)
|
||||||
if: failure() && (steps.swap.outcome == 'failure' || steps.health.outcome == 'failure' || steps.e2e.outcome == 'failure')
|
if: failure() && (steps.swap.outcome == 'failure' || steps.health.outcome == 'failure')
|
||||||
id: rollback
|
id: rollback
|
||||||
run: scripts/ci/deploy-container.sh rollback
|
run: scripts/ci/deploy-container.sh rollback
|
||||||
|
|
||||||
@@ -225,9 +116,7 @@ jobs:
|
|||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
name: ci-deploy-failure-${{ github.run_id }}
|
name: ci-deploy-failure-${{ github.run_id }}
|
||||||
path: |
|
path: container.log
|
||||||
container.log
|
|
||||||
playwright-report/
|
|
||||||
retention-days: 7
|
retention-days: 7
|
||||||
|
|
||||||
- name: Prune old images
|
- name: Prune old images
|
||||||
|
|||||||
@@ -8,7 +8,8 @@ on:
|
|||||||
|
|
||||||
# Workflow B: sync to GitLab + open MR + auto-merge.
|
# Workflow B: sync to GitLab + open MR + auto-merge.
|
||||||
# Stops at "MR merged" — Jenkins is triggered manually by the operator.
|
# Stops at "MR merged" — Jenkins is triggered manually by the operator.
|
||||||
# After Jenkins finishes, run the `release-verify` workflow to e2e the customer URL.
|
# After Jenkins finishes, run the `release-verify` workflow to smoke-check
|
||||||
|
# the customer URL.
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
release:
|
release:
|
||||||
|
|||||||
Reference in New Issue
Block a user