CI/CD pipeline: ssh -L tunnel for TIM API + manual Jenkins trigger

Two design pivots discovered during Phase B prerequisites:

Routing: Replace static-route + NAT plan with persistent ssh -L tunnel
from pve-201 to webzavod (deployment/systemd/flights-tim-tunnel.service).
nginx proxies /api/ and /map/api/ to https://127.0.0.1:8443 with SNI/Host
overrides so cert validation still targets the real hostname. No webzavod
kernel changes (no ip_forward/MASQUERADE), no /etc/hosts pin needed.

Workflow B: Drop Jenkins trigger/poll automation (operator lacks Jenkins
job-configure access and user API token access). release.yml now stops
after MR merge with a Telegram message containing the Jenkins job URL.
release-verify.yml (new, workflow_dispatch only) runs the customer-URL
e2e suite once the operator has triggered Jenkins manually and it has
completed.

Other:
- SSR loopback port 8081 -> 3002 (8081 was taken by openwebui on pve-201)
- notify-telegram.sh skips cleanly when TG secrets unset (was: hard-fail)
- README + spec addendum cover the new prereqs and removed steps
This commit is contained in:
2026-04-27 11:58:39 +03:00
parent bceca6ad57
commit 03eeddfbf8
14 changed files with 253 additions and 323 deletions
+2 -2
View File
@@ -16,7 +16,7 @@ jobs:
BASIC_AUTH_PASS: ${{ secrets.BASIC_AUTH_PASS }}
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
FLIGHTS_WEB_PORT: '8081'
FLIGHTS_WEB_PORT: '3002'
steps:
- name: Checkout
@@ -90,7 +90,7 @@ jobs:
- name: Run Playwright e2e
id: e2e
env:
BASE_URL: http://127.0.0.1:8081
BASE_URL: http://127.0.0.1:3002
run: pnpm test:e2e
- name: Rollback on failure (post-deploy steps)
+60
View File
@@ -0,0 +1,60 @@
name: release-verify
# Workflow C: run after Jenkins has finished building (operator triggers manually).
# Probes the customer URL until it serves a fresh build, then runs the e2e suite
# against http://flights-ui.devwebzavod.ru with the console-error gate.
on:
workflow_dispatch:
jobs:
verify:
runs-on: pve-201
timeout-minutes: 30
env:
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Notify start
if: ${{ env.TELEGRAM_BOT_TOKEN != '' }}
run: scripts/ci/notify-telegram.sh start release-verify
- name: Setup Node + pnpm
uses: actions/setup-node@v4
with:
node-version-file: '.nvmrc'
- uses: pnpm/action-setup@v4
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Wait for customer URL
id: wait_customer
run: scripts/ci/wait-for-url.sh http://flights-ui.devwebzavod.ru/ru-ru/onlineboard 60 5
- name: Run Playwright e2e against customer URL
id: e2e_customer
env:
BASE_URL: http://flights-ui.devwebzavod.ru
run: pnpm test:e2e
- name: Upload artifacts on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: release-verify-failure-${{ github.run_id }}
path: |
playwright-report/
retention-days: 7
- name: Notify (success)
if: success() && env.TELEGRAM_BOT_TOKEN != ''
run: scripts/ci/notify-telegram.sh ok release-verify "customer URL e2e green"
- name: Notify (failure)
if: failure() && env.TELEGRAM_BOT_TOKEN != ''
run: scripts/ci/notify-telegram.sh fail release-verify "see Gitea run for Playwright report"
+11 -39
View File
@@ -6,20 +6,20 @@ on:
tags:
- 'release-*'
# Workflow B: sync to GitLab + open MR + auto-merge.
# Stops at "MR merged" — Jenkins is triggered manually by the operator.
# After Jenkins finishes, run the `release-verify` workflow to e2e the customer URL.
jobs:
release:
runs-on: pve-201
timeout-minutes: 60
timeout-minutes: 30
env:
GITLAB_PAT: ${{ secrets.GITLAB_PAT }}
GITLAB_PROJECT_ID: ${{ secrets.GITLAB_PROJECT_ID }}
GITLAB_HOST: 'https://teamscore.gitlab.yandexcloud.net'
GITLAB_PROJECT_PATH: 'aeroflot2/flights-front'
JENKINS_BASE_URL: 'http://jenkins.yc.devwebzavod.ru:8080'
JENKINS_JOB_PATH: '/job/Aeroflot2/job/Flights-Front-Dev'
JENKINS_USER: ${{ secrets.JENKINS_USER }}
JENKINS_API_TOKEN: ${{ secrets.JENKINS_API_TOKEN }}
JENKINS_TRIGGER_TOKEN: ${{ secrets.JENKINS_TRIGGER_TOKEN }}
JENKINS_JOB_URL: 'http://jenkins.yc.devwebzavod.ru:8080/job/Aeroflot2/job/Flights-Front-Dev/'
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
@@ -37,8 +37,6 @@ jobs:
id: gate
run: |
API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=${GITHUB_SHA}"
# Gitea Actions API is similar to GitHub's; this query may differ slightly per Gitea version.
# If the endpoint isn't available, fall back to a last-3-runs check via the workflows endpoint.
resp=$(curl -fsS -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" "$API" || echo '{"workflow_runs":[]}')
ok=$(echo "$resp" | jq -r --arg name "ci-deploy" '
.workflow_runs[]
@@ -70,8 +68,6 @@ jobs:
- name: Clone GitLab target
id: clone
env:
GITLAB_PAT: ${{ secrets.GITLAB_PAT }}
run: |
rm -rf /tmp/flights-front
git clone "https://oauth2:${GITLAB_PAT}@teamscore.gitlab.yandexcloud.net/aeroflot2/flights-front.git" /tmp/flights-front
@@ -145,7 +141,7 @@ jobs:
"${GITLAB_HOST}/api/v4/projects/${GITLAB_PROJECT_ID}/merge_requests/${{ steps.mr_open.outputs.iid }}/merge" \
>/dev/null
- name: Cleanup MR + branch on failure (B:9-11 only)
- name: Cleanup MR + branch on failure
if: failure() && (steps.mr_open.outcome == 'failure' || steps.mr_approve.outcome == 'failure' || steps.mr_merge.outcome == 'failure')
run: |
IID="${{ steps.mr_open.outputs.iid }}"
@@ -165,35 +161,11 @@ jobs:
>/dev/null || true
fi
- name: Trigger + wait for Jenkins
id: jenkins
if: steps.commit.outputs.skip_remaining != '1'
run: scripts/ci/jenkins-trigger-and-wait.sh
- name: Wait for customer URL to update
id: wait_customer
if: steps.commit.outputs.skip_remaining != '1'
run: scripts/ci/wait-for-url.sh http://flights-ui.devwebzavod.ru/ru-ru/onlineboard 60 5
- name: Run Playwright e2e against customer URL
id: e2e_customer
if: steps.commit.outputs.skip_remaining != '1'
env:
BASE_URL: http://flights-ui.devwebzavod.ru
run: pnpm test:e2e
- name: Upload artifacts on failure
if: failure()
uses: actions/upload-artifact@v4
with:
name: release-failure-${{ github.run_id }}
path: |
playwright-report/
retention-days: 7
- name: Notify (success)
- name: Notify (success — manual Jenkins trigger required)
if: success() && env.TELEGRAM_BOT_TOKEN != ''
run: scripts/ci/notify-telegram.sh ok release "MR ${{ steps.mr_open.outputs.url }}"
run: |
MR_URL='${{ steps.mr_open.outputs.url }}'
scripts/ci/notify-telegram.sh ok release "MR merged: ${MR_URL}. Now trigger Jenkins manually: ${JENKINS_JOB_URL}, then dispatch the release-verify workflow."
- name: Notify (failure)
if: failure() && env.TELEGRAM_BOT_TOKEN != ''
+59 -79
View File
@@ -4,62 +4,46 @@ This is the bootstrap procedure for hosting `https://ui-dashboard.gnerim.ru/` on
## One-time setup
### 1. Routing pve-201 → TIM API (via webzavod)
### 1. SSH tunnel pve-201 → webzavod (TIM API access)
**On webzavod (192.168.88.58)** — verify IP forwarding and MASQUERADE:
The customer WAF on `flights.test.aeroflot.ru` only accepts requests from corp-VPN egress IPs. nginx proxies `/api/` and `/map/api/` to `https://127.0.0.1:8443`, which is forwarded over SSH to webzavod (which terminates the corp VPN on `ppp0`). A systemd unit keeps the tunnel up.
```bash
sysctl net.ipv4.ip_forward # expect: 1
sudo iptables -t nat -L POSTROUTING -nv | grep ppp0 # expect: MASQUERADE rule
**On webzavod (192.168.88.58)** — append the pve-201 pubkey to `~gnezim/.ssh/authorized_keys` with `permitopen` restricting it to one host:port (one-time, read pve-201's `~gnezim/.ssh/id_rsa.pub` first):
```
command="exit 1",no-pty,no-X11-forwarding,no-agent-forwarding,no-user-rc,permitopen="flights.test.aeroflot.ru:443" ssh-rsa AAAA…== pve-201-flights-tim-tunnel
```
If missing:
**On pve-201** — install + enable the systemd unit:
```bash
echo 'net.ipv4.ip_forward=1' | sudo tee -a /etc/sysctl.conf
sudo sysctl -p
sudo iptables -t nat -A POSTROUTING -o ppp0 -j MASQUERADE
sudo apt install iptables-persistent
sudo netfilter-persistent save
```
**On pve-201** — add a persistent static route to TIM via webzavod:
```yaml
# /etc/netplan/01-routes.yaml — adjust NIC name as needed
network:
version: 2
ethernets:
<nic-name>: # replace with actual NIC name from `ip link show`
routes:
- to: 172.18.0.0/16
via: 192.168.88.58
```
```bash
sudo netplan apply
```
**On pve-201** — pin TIM hostnames to reachable A records (TIM DNS returns duplicate As, one of which is dead):
```bash
echo '172.18.0.121 flights.test.aeroflot.ru' | sudo tee -a /etc/hosts
cd /path/to/Aeroflot.Flights.Web
sudo cp deployment/systemd/flights-tim-tunnel.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable --now flights-tim-tunnel.service
sudo systemctl status flights-tim-tunnel.service --no-pager
```
**Smoke test:**
```bash
curl -v https://flights.test.aeroflot.ru/swagger/ # expect: 401 in <300ms
ss -ltn | grep ':8443\b' # expect: a 127.0.0.1:8443 LISTEN line
curl -k --resolve flights.test.aeroflot.ru:8443:127.0.0.1 \
-o /dev/null -w 'swagger: %{http_code}\n' \
https://flights.test.aeroflot.ru:8443/swagger/index.html # expect 401
curl -k --resolve flights.test.aeroflot.ru:8443:127.0.0.1 \
-o /dev/null -w 'api/health: %{http_code}\n' \
https://flights.test.aeroflot.ru:8443/api/health # expect 200
```
If this fails, fix routing/DNS before proceeding — nothing else will work.
If swagger returns 200 with HTML body instead of 401, the tunnel is bypassed and the request egressed directly — fix the listener / SSH unit before proceeding.
### 2. nginx vhost
```bash
cd /path/to/Aeroflot.Flights.Web # repo root, e.g. ~/repos/Aeroflot.Flights.Web
cd /path/to/Aeroflot.Flights.Web
sudo cp deployment/nginx/ui-dashboard.gnerim.ru.conf /etc/nginx/sites-available/
sudo ln -s /etc/nginx/sites-available/ui-dashboard.gnerim.ru.conf /etc/nginx/sites-enabled/
sudo ln -sf /etc/nginx/sites-available/ui-dashboard.gnerim.ru.conf /etc/nginx/sites-enabled/
sudo mkdir -p /etc/nginx/htpasswd
sudo nginx -t
sudo systemctl reload nginx
@@ -81,19 +65,19 @@ Reachability checks the runner must pass:
```bash
curl -fsS https://git.gnerim.ru/ # Gitea
curl -fsSI https://teamscore.gitlab.yandexcloud.net/ # GitLab
curl -fsSI http://jenkins.yc.devwebzavod.ru:8080/ # Jenkins (via static route)
curl -fsSI http://flights-ui.devwebzavod.ru/ # Customer URL (via static route)
```
The customer Jenkins URL and the customer site (`flights-ui.devwebzavod.ru`) are NOT reachable from the runner directly — Workflow B does not call them. Customer-side e2e (Workflow C, `release-verify`) only runs after the operator has manually triggered the Jenkins build, and it reaches the customer URL the same way the upstream API is reached: direct egress where possible, or through additional tunnels added on demand.
### 4. GitLab Personal Access Token
GitLab → User Settings → Access Tokens → create with scopes `api` and `write_repository`. Store as Gitea Actions secret `GITLAB_PAT`.
### 5. Allow self-approve on GitLab project
GitLab → flights-front project → Settings → Merge requests → Approval rules → uncheck **"Prevent approval by author"**.
GitLab → flights-front project → Settings → Merge requests → Approval rules → uncheck **"Prevent approval by author"** (skip if you can already approve your own MRs in the GitLab UI).
Verify by running (locally, after PAT is in place — script is created in Task 17 of the plan):
Verify by running (locally, after PAT is in place):
```bash
GITLAB_PAT=<pat> ./scripts/ci/check-gitlab-project.sh
@@ -101,29 +85,26 @@ GITLAB_PAT=<pat> ./scripts/ci/check-gitlab-project.sh
It prints the numeric project ID (store as `GITLAB_PROJECT_ID` secret) and confirms self-approve is allowed.
### 6. Jenkins remote trigger token
Jenkins → `Aeroflot2/Flights-Front-Dev` job → Configure → check **"Trigger builds remotely"** → set token (e.g. `flights-cd-trigger`). Store as `JENKINS_TRIGGER_TOKEN`.
Also: Jenkins → User → Configure → API Token → Add new token. Store username as `JENKINS_USER`, token as `JENKINS_API_TOKEN`.
### 7. Telegram bot
### 6. Telegram bot (optional)
Use existing bot or create via @BotFather. Get the chat_id by sending a message and querying `https://api.telegram.org/bot<TOKEN>/getUpdates`. Store as `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID`.
### 8. Gitea Actions secrets summary
If either secret is unset, all `notify-telegram.sh` calls in the workflows skip cleanly with no error — the pipeline runs end-to-end without Telegram configured.
### 7. Gitea Actions secrets summary
Repo → Settings → Actions → Secrets — set all of:
| Secret | Purpose |
|---|---|
| `BASIC_AUTH_USER`, `BASIC_AUTH_PASS` | nginx htpasswd |
| `MAP_TILE_URL` | Default `/map/api/tile/{z}/{x}/{y}.jpeg` |
| `API_BASE_URL` | Default `/api` |
| `GITLAB_PAT`, `GITLAB_PROJECT_ID` | GitLab MR API |
| `JENKINS_USER`, `JENKINS_API_TOKEN`, `JENKINS_TRIGGER_TOKEN` | Jenkins API |
| `TELEGRAM_BOT_TOKEN`, `TELEGRAM_CHAT_ID` | Notifications |
| `GITHUB_TOKEN` | Auto-provided by Gitea Actions — no manual setup required |
| Secret | Required | Purpose |
|---|---|---|
| `BASIC_AUTH_USER`, `BASIC_AUTH_PASS` | yes | nginx htpasswd for `ui-dashboard.gnerim.ru` |
| `MAP_TILE_URL` | optional | Default `/map/api/tile/{z}/{x}/{y}.jpeg` |
| `API_BASE_URL` | optional | Default `/api` |
| `GITLAB_PAT`, `GITLAB_PROJECT_ID` | yes (release only) | GitLab MR API |
| `TELEGRAM_BOT_TOKEN`, `TELEGRAM_CHAT_ID` | optional | Notifications |
| `GITHUB_TOKEN` | auto | Provided by Gitea Actions — no manual setup required |
Jenkins is triggered manually after the release workflow merges to GitLab; no Jenkins secret is required.
## Verifying failure paths
@@ -148,7 +129,7 @@ Then push a commit that fails e2e. Rollback step finds no `:previous` and bails.
- Telegram message: `🔥 ci-deploy ROLLBACK FAILED — site is DOWN`
- `https://ui-dashboard.gnerim.ru/` returns 502.
- Manual recovery: `ssh pve-201 'docker stop flights-web 2>/dev/null; docker rm flights-web 2>/dev/null; docker run -d --name flights-web --restart unless-stopped -p 127.0.0.1:8081:8080 flights-web:<known-good-sha>'`.
- Manual recovery: `ssh pve-201 'docker stop flights-web 2>/dev/null; docker rm flights-web 2>/dev/null; docker run -d --name flights-web --restart unless-stopped -p 127.0.0.1:3002:8080 flights-web:<known-good-sha>'`.
### B: blocked on A not green
@@ -157,27 +138,15 @@ Trigger Workflow B (manual or tag) for a SHA that has no green Workflow A run. V
- Telegram message: `⚠️ release blocked — workflow ci-deploy is not green for <sha>`
- B exits early; nothing changes in GitLab.
### B: Jenkins poll timeout
Temporarily edit `scripts/ci/jenkins-trigger-and-wait.sh` to change the default:
```bash
TIMEOUT="${JENKINS_TIMEOUT:-30}" # was 1800
```
Push to a throwaway branch, trigger Workflow B from that branch via the Gitea UI, and confirm:
- Telegram message: `❌ release FAILED at Jenkins build` (because polling gives up after 30s)
- The Jenkins job itself may continue running — that's fine, it's outside our control.
**Restore the original 1800 default** and force-delete the throwaway branch when done.
## Manual recovery scenarios
### Workflow B failed at step 12-13 (Jenkins) — MR merged but customer site stale
### Workflow B succeeded but Jenkins build failed
GitLab is already at the new commit; Jenkins didn't deploy. Recovery:
GitLab is at the new commit; customer site is stale. Recovery:
1. Open Jenkins UI → click "Build Now" on the same job, or
2. Push a new commit to GitLab to re-trigger Jenkins polling (if it's set up that way), or
3. Re-run Workflow B from a green Workflow A — but only if you also pushed new code; otherwise B will sync a no-op and skip.
1. Open Jenkins UI → check the failing build's console log
2. Fix the issue (in this repo if it's our bug, in customer's infra otherwise)
3. Push fix → Workflow A → Workflow B → trigger Jenkins again
### Container running but nginx returns 502
@@ -186,7 +155,7 @@ Check the bind:
```bash
ssh pve-201
docker ps --filter name=flights-web
curl -v http://127.0.0.1:8081/ # should return 200 (or whatever the SSR root returns)
curl -v http://127.0.0.1:3002/ # should return 200 (or whatever the SSR root returns)
sudo nginx -t && sudo systemctl reload nginx
```
@@ -195,5 +164,16 @@ If the container died, the Restart policy `unless-stopped` should bring it back.
```bash
docker logs flights-web --tail 200
docker stop flights-web 2>/dev/null; docker rm flights-web 2>/dev/null
docker run -d --name flights-web --restart unless-stopped -p 127.0.0.1:8081:8080 flights-web:current
docker run -d --name flights-web --restart unless-stopped -p 127.0.0.1:3002:8080 flights-web:current
```
### TIM tunnel is down (502 on /api/* but / works)
```bash
sudo systemctl status flights-tim-tunnel.service --no-pager
sudo journalctl -u flights-tim-tunnel.service -n 50 --no-pager
sudo systemctl restart flights-tim-tunnel.service
ss -ltn | grep ':8443\b' # confirm listener is back
```
If the tunnel won't come up, verify SSH key is still authorised on webzavod and that webzavod's `ppp0` is up (`ssh webzavod 'ip -br addr show ppp0'`).
+11 -6
View File
@@ -18,9 +18,9 @@ server {
auth_basic "ui-dashboard";
auth_basic_user_file /etc/nginx/htpasswd/ui-dashboard;
# SSR app on loopback (container bound to 127.0.0.1:8081)
# SSR app on loopback (container bound to 127.0.0.1:3002)
location / {
proxy_pass http://127.0.0.1:8081;
proxy_pass http://127.0.0.1:3002;
proxy_set_header Host $host;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_set_header X-Real-IP $remote_addr;
@@ -32,21 +32,26 @@ server {
}
# API proxy — bypass basic auth (gates HTML, not API).
# Static route on the host sends 172.18.0.0/16 via 192.168.88.58 (webzavod).
# /etc/hosts pins flights.test.aeroflot.ru → 172.18.0.121.
# Routed via the flights-tim-tunnel.service systemd unit (see
# deployment/systemd/flights-tim-tunnel.service): 127.0.0.1:8443 is an
# ssh -L tunnel to webzavod which exits via ppp0 with a corp-VPN source IP
# the upstream WAF whitelists. SNI/Host are set explicitly because the
# TCP target is loopback rather than the real hostname.
location /api/ {
auth_basic off;
proxy_pass https://flights.test.aeroflot.ru;
proxy_pass https://127.0.0.1:8443;
proxy_set_header Host flights.test.aeroflot.ru;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_ssl_server_name on;
proxy_ssl_name flights.test.aeroflot.ru;
}
location /map/api/ {
auth_basic off;
proxy_pass https://flights.test.aeroflot.ru;
proxy_pass https://127.0.0.1:8443;
proxy_set_header Host flights.test.aeroflot.ru;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_ssl_server_name on;
proxy_ssl_name flights.test.aeroflot.ru;
}
}
@@ -0,0 +1,44 @@
# SSH local-forward tunnel: pve-201 -> webzavod -> flights.test.aeroflot.ru:443.
#
# nginx on pve-201 proxies /api/ and /map/api/ to https://127.0.0.1:8443. This
# unit forwards 8443 over SSH to webzavod (192.168.88.58), which terminates the
# corp VPN (ppp0). The customer WAF whitelists webzavod's egress IP, so requests
# arriving via this tunnel reach the real backend instead of the WAF interstitial.
#
# Webzavod's authorized_keys entry restricts this key to:
# command="exit 1",no-pty,no-X11-forwarding,no-agent-forwarding,no-user-rc,
# permitopen="flights.test.aeroflot.ru:443"
#
# Install:
# sudo cp deployment/systemd/flights-tim-tunnel.service /etc/systemd/system/
# sudo systemctl daemon-reload
# sudo systemctl enable --now flights-tim-tunnel.service
#
# Verify:
# ss -ltn | grep ':8443\b'
# curl -k --resolve flights.test.aeroflot.ru:8443:127.0.0.1 \
# -o /dev/null -w '%{http_code}\n' \
# https://flights.test.aeroflot.ru:8443/swagger/index.html # expect 401
[Unit]
Description=SSH tunnel pve-201->webzavod for flights.test.aeroflot.ru:443
Documentation=https://git.gnerim.ru/gnezim/Aeroflot.Flights.Web
Wants=network-online.target
After=network-online.target
[Service]
Type=simple
User=gnezim
ExecStart=/usr/bin/ssh -N \
-o BatchMode=yes \
-o ExitOnForwardFailure=yes \
-o ServerAliveInterval=30 \
-o ServerAliveCountMax=3 \
-o StrictHostKeyChecking=accept-new \
-L 127.0.0.1:8443:flights.test.aeroflot.ru:443 \
gnezim@192.168.88.58
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target
@@ -485,3 +485,52 @@ When a private registry comes online (eventual `registry.gnerim.ru`), changes:
2. **The 9 untracked `snap-*.yml` files at repo root** look like throwaway parity-snapshot artifacts. Add to `.gitignore` or commit? Verify before flipping pipeline on (prereq #14).
3. **e2e portability to remote `BASE_URL`** — existing specs were written against localhost. Many likely hardcode paths or rely on dev-only state. Layer 2 of testing strategy budgets time for this.
4. **Initial console-allowlist content** — empty starter; will be populated on first runs ("we'll figure it out in future" per design discussion).
---
## Addendum 2026-04-27 — routing change + manual Jenkins trigger
Two design pivots discovered during Phase B prerequisites work:
### Routing: ssh -L tunnel instead of static-route + NAT
Original design: static route on pve-201 pushes `<TIM-CIDR>` via webzavod's LAN IP, webzavod NATs LAN→ppp0, `/etc/hosts` pins `flights.test.aeroflot.ru` to an internal A record.
Discovered:
- `flights.test.aeroflot.ru` resolves to public IPs from both pve-201 and webzavod (no internal A record exists).
- pve-201 reaches the public IP directly with HTTP 200, **but the response is a WAF interstitial** — the customer WAF returns 200/HTML for non-corp egress and 401/JSON-ready for corp egress.
- The same URL from webzavod returns 401 (real backend) — webzavod's `ppp0` egress IP is whitelisted.
New design: persistent `ssh -L 127.0.0.1:8443:flights.test.aeroflot.ru:443` from pve-201 to webzavod via systemd unit `deployment/systemd/flights-tim-tunnel.service`. nginx proxies `/api/` and `/map/api/` to `https://127.0.0.1:8443` with `Host` and `proxy_ssl_name` overrides so SNI/cert validation still target the real hostname.
Webzavod-side authorisation pinned with `command="exit 1",no-pty,no-X11-forwarding,no-agent-forwarding,no-user-rc,permitopen="flights.test.aeroflot.ru:443"` — the key cannot open a shell, agent-forward, or forward any other host:port.
Trade-offs vs. original:
- ✅ No webzavod kernel changes (no `ip_forward` toggle, no MASQUERADE rule, no iptables-persistent).
- ✅ No `/etc/hosts` pin needed (DNS resolution happens on webzavod, where the real IPs work).
- ✅ Recoverable in seconds (`systemctl restart flights-tim-tunnel`).
- ⚠ Per-host SSH tunnel — adding another upstream means another `-L` line. Currently only one upstream.
- ⚠ Discovered OpenSSH 9.6 quirk: `restrict + permitopen` causes TLS handshake to EOF mid-stream. Using explicit `no-*` options instead of `restrict` works.
### Workflow B: drop Jenkins automation
Original design: Workflow B triggers Jenkins via remote-build token, polls build status via authenticated API, then runs e2e against customer URL.
Constraint: operator does not have Jenkins job-configure access (no remote-trigger token) nor Jenkins user API token access. Authenticated API trigger and polling are not possible without admin involvement.
New design:
- **Workflow B (`release.yml`)** — sync to GitLab, open MR, auto-approve, auto-merge, **stop**. Telegram notify includes the Jenkins job URL with instructions to trigger by hand.
- **Workflow C (`release-verify.yml`)** — `workflow_dispatch` only. Operator runs manually after Jenkins finishes. Probes customer URL until reachable, runs Playwright e2e against `http://flights-ui.devwebzavod.ru` with the console-error gate, notifies Telegram.
Removed from the repo:
- `scripts/ci/jenkins-trigger-and-wait.sh`
- `tests/ci/test-jenkins-trigger.sh`
- `tests/ci/fixtures/jenkins-{success,failure}-flow.json`
- `JENKINS_USER`, `JENKINS_API_TOKEN`, `JENKINS_TRIGGER_TOKEN` secrets
Trade-off: lose automated end-to-end pipeline. Acceptable because (a) operator already triggers Jenkins manually today, (b) the manual step is a checkpoint where build failures surface clearly, (c) future Jenkins API access can swap C back into B without changing the rest of the design.
### Other small adjustments
- SSR container loopback port changed from `8081` → `3002` (port 8081 already in use on pve-201 by openwebui).
- `notify-telegram.sh` now skips cleanly when Telegram secrets are unset (was: hard-fail). Lets the pipeline run end-to-end without TG configured.
+2 -2
View File
@@ -9,7 +9,7 @@
#
# Env:
# GITHUB_SHA (required for swap)
# FLIGHTS_WEB_PORT (default 8081 — host port that nginx proxies to)
# FLIGHTS_WEB_PORT (default 3002 — host port that nginx proxies to)
# IMAGE_NAME (default flights-web — set this to point at a registry later)
set -euo pipefail
@@ -20,7 +20,7 @@ if [ "${1:-}" = "--dry-run" ]; then
fi
CMD="${1:-}"
PORT="${FLIGHTS_WEB_PORT:-8081}"
PORT="${FLIGHTS_WEB_PORT:-3002}"
IMAGE="${IMAGE_NAME:-flights-web}"
run() {
-124
View File
@@ -1,124 +0,0 @@
#!/usr/bin/env bash
# jenkins-trigger-and-wait.sh — fire a Jenkins job and wait for completion.
#
# Usage:
# jenkins-trigger-and-wait.sh # real mode (env-driven)
# jenkins-trigger-and-wait.sh --mock-mode <fixture.json> # for tests
#
# Env (real mode):
# JENKINS_BASE_URL e.g. http://jenkins.yc.devwebzavod.ru:8080
# JENKINS_JOB_PATH e.g. /job/Aeroflot2/job/Flights-Front-Dev
# JENKINS_USER, JENKINS_API_TOKEN
# JENKINS_TRIGGER_TOKEN
# JENKINS_TIMEOUT seconds (default 1800)
# JENKINS_POLL_INTERVAL seconds (default 10)
set -euo pipefail
MODE=real
FIXTURE=""
if [ "${1:-}" = "--mock-mode" ]; then
MODE=mock
FIXTURE="${2:-}"
[ -n "$FIXTURE" ] || { echo "usage: $0 --mock-mode <fixture.json>" >&2; exit 2; }
command -v jq >/dev/null 2>&1 || { echo "fatal: jq required for --mock-mode" >&2; exit 2; }
fi
POLL_INTERVAL="${JENKINS_POLL_INTERVAL:-10}"
TIMEOUT="${JENKINS_TIMEOUT:-1800}"
if [ "$MODE" = real ]; then
: "${JENKINS_BASE_URL:?required}"
: "${JENKINS_JOB_PATH:?required}"
: "${JENKINS_USER:?required}"
: "${JENKINS_API_TOKEN:?required}"
: "${JENKINS_TRIGGER_TOKEN:?required}"
fi
# ── Mock mode: walk fixture deterministically ─────────────────────────────────
if [ "$MODE" = mock ]; then
QUEUE_URL=$(jq -r '.trigger_response.headers.Location' "$FIXTURE")
echo "triggered (mock): queue=$QUEUE_URL"
# Walk queue polls until we get an executable.
count=$(jq '.queue_polls | length' "$FIXTURE")
BUILD_URL=""
for i in $(seq 0 $((count - 1))); do
body=$(jq -c ".queue_polls[$i].body" "$FIXTURE")
exe_url=$(printf '%s' "$body" | jq -r '.executable.url // empty')
if [ -n "$exe_url" ]; then
BUILD_URL="$exe_url"
break
fi
echo "queue poll $((i + 1)): not yet"
done
[ -n "${BUILD_URL:-}" ] || { echo "fatal: queue never produced executable" >&2; exit 1; }
echo "build url (mock): $BUILD_URL"
# Walk build polls until result != null.
count=$(jq '.build_polls | length' "$FIXTURE")
for i in $(seq 0 $((count - 1))); do
body=$(jq -c ".build_polls[$i].body" "$FIXTURE")
result=$(printf '%s' "$body" | jq -r '.result // empty')
number=$(printf '%s' "$body" | jq -r '.number')
if [ -n "$result" ]; then
if [ "$result" = "SUCCESS" ]; then
echo "build #${number} SUCCESS"
exit 0
else
echo "build #${number} ${result}" >&2
exit 1
fi
fi
echo "build poll $((i + 1)): building"
done
echo "fatal: build never completed within fixture" >&2
exit 1
fi
# ── Real mode ─────────────────────────────────────────────────────────────────
TRIGGER_URL="${JENKINS_BASE_URL}${JENKINS_JOB_PATH}/build?token=${JENKINS_TRIGGER_TOKEN}"
echo "triggering: $TRIGGER_URL"
# -D - dumps headers; -o /dev/null discards body. We need the Location header.
HEADERS=$(curl -fsS -X POST -u "${JENKINS_USER}:${JENKINS_API_TOKEN}" -D - -o /dev/null "$TRIGGER_URL")
QUEUE_URL=$(printf '%s' "$HEADERS" | grep -i '^Location:' | head -1 | sed 's/^[Ll]ocation:[[:space:]]*//' | tr -d '\r\n')
[ -n "$QUEUE_URL" ] || { echo "fatal: no Location header from Jenkins" >&2; exit 1; }
echo "queue: $QUEUE_URL"
# Poll queue for executable.url. START covers both queue + build phases.
START=$(date +%s)
BUILD_URL=""
while [ -z "$BUILD_URL" ]; do
resp=$(curl -fsS -u "${JENKINS_USER}:${JENKINS_API_TOKEN}" "${QUEUE_URL}api/json")
BUILD_URL=$(printf '%s' "$resp" | jq -r '.executable.url // empty')
[ -n "$BUILD_URL" ] && break
now=$(date +%s)
if [ $((now - START)) -ge "$TIMEOUT" ]; then
echo "fatal: queue timeout after ${TIMEOUT}s" >&2
exit 1
fi
sleep "$POLL_INTERVAL"
done
echo "build: $BUILD_URL"
# Poll build for result. Timeout window is shared with queue phase (START not reset).
while :; do
resp=$(curl -fsS -u "${JENKINS_USER}:${JENKINS_API_TOKEN}" "${BUILD_URL}api/json")
result=$(printf '%s' "$resp" | jq -r '.result // empty')
number=$(printf '%s' "$resp" | jq -r '.number')
if [ -n "$result" ]; then
if [ "$result" = "SUCCESS" ]; then
echo "build #${number} SUCCESS"
exit 0
else
echo "build #${number} ${result} — see ${BUILD_URL}console" >&2
exit 1
fi
fi
now=$(date +%s)
if [ $((now - START)) -ge "$TIMEOUT" ]; then
echo "fatal: build timeout after ${TIMEOUT}s — see ${BUILD_URL}console" >&2
exit 1
fi
sleep "$POLL_INTERVAL"
done
+4 -2
View File
@@ -28,8 +28,10 @@ esac
[ -n "$STAGE" ] || { echo "usage: $0 [--dry-run] <start|ok|fail> <stage> [<extra-context>]" >&2; exit 2; }
if [ "$DRY_RUN" -eq 0 ]; then
: "${TELEGRAM_BOT_TOKEN:?TELEGRAM_BOT_TOKEN required}"
: "${TELEGRAM_CHAT_ID:?TELEGRAM_CHAT_ID required}"
if [ -z "${TELEGRAM_BOT_TOKEN:-}" ] || [ -z "${TELEGRAM_CHAT_ID:-}" ]; then
echo "notify-telegram: TELEGRAM_BOT_TOKEN/TELEGRAM_CHAT_ID unset — skipping" >&2
exit 0
fi
fi
REPO="${GITHUB_REPOSITORY:-unknown/repo}"
@@ -1,15 +0,0 @@
{
"trigger_response": {
"status": 201,
"headers": {
"Location": "http://jenkins.test/queue/item/78/"
}
},
"queue_polls": [
{"status": 200, "body": {"executable": {"number": 43, "url": "http://jenkins.test/job/Aeroflot2/job/Flights-Front-Dev/43/"}}}
],
"build_polls": [
{"status": 200, "body": {"building": true, "result": null, "number": 43}},
{"status": 200, "body": {"building": false, "result": "FAILURE", "number": 43}}
]
}
@@ -1,18 +0,0 @@
{
"trigger_response": {
"status": 201,
"headers": {
"Location": "http://jenkins.test/queue/item/77/"
}
},
"queue_polls": [
{"status": 200, "body": {"why": "in queue", "executable": null}},
{"status": 200, "body": {"why": "in queue", "executable": null}},
{"status": 200, "body": {"executable": {"number": 42, "url": "http://jenkins.test/job/Aeroflot2/job/Flights-Front-Dev/42/"}}}
],
"build_polls": [
{"status": 200, "body": {"building": true, "result": null, "number": 42}},
{"status": 200, "body": {"building": true, "result": null, "number": 42}},
{"status": 200, "body": {"building": false, "result": "SUCCESS", "number": 42}}
]
}
-31
View File
@@ -1,31 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
SCRIPT="$ROOT/scripts/ci/jenkins-trigger-and-wait.sh"
[ -x "$SCRIPT" ] || { echo "FAIL: $SCRIPT not executable"; exit 1; }
# Mock-mode tests need jq — bail with a useful message if unavailable.
command -v jq >/dev/null 2>&1 || { echo "SKIP: jq not installed"; exit 0; }
# --- success path ---
if ! "$SCRIPT" --mock-mode "$ROOT/tests/ci/fixtures/jenkins-success-flow.json" 2>&1 | tee /tmp/jenkins-test.log; then
echo "FAIL: success fixture should exit 0"
exit 1
fi
grep -q "build #42 SUCCESS" /tmp/jenkins-test.log || { echo "FAIL: expected 'build #42 SUCCESS'"; exit 1; }
# --- failure path ---
if "$SCRIPT" --mock-mode "$ROOT/tests/ci/fixtures/jenkins-failure-flow.json" 2>&1 | tee /tmp/jenkins-test.log; then
echo "FAIL: failure fixture should exit non-zero"
exit 1
fi
grep -q "FAILURE" /tmp/jenkins-test.log || { echo "FAIL: expected 'FAILURE' in output"; exit 1; }
# --- bad usage ---
if "$SCRIPT" 2>/dev/null; then
echo "FAIL: expected usage error"
exit 1
fi
echo "PASS: jenkins-trigger-and-wait.sh"
+9 -3
View File
@@ -37,12 +37,18 @@ out=$("$SCRIPT" --dry-run fail ci-deploy "Run Playwright e2e")
assert_contains "$out" "❌ ci-deploy FAILED"
assert_contains "$out" "Run Playwright e2e"
# --- missing env should error in non-dry-run ---
# --- missing env in non-dry-run: should skip cleanly (exit 0, log to stderr) ---
unset TELEGRAM_BOT_TOKEN
if "$SCRIPT" ok ci-deploy 2>/dev/null; then
echo "FAIL: expected error when TELEGRAM_BOT_TOKEN missing"
set +e
err=$("$SCRIPT" ok ci-deploy 2>&1 >/dev/null)
rc=$?
set -e
if [ $rc -ne 0 ]; then
echo "FAIL: expected exit 0 when TELEGRAM_BOT_TOKEN missing (got $rc)"
exit 1
fi
assert_contains "$err" "skipping"
export TELEGRAM_BOT_TOKEN="test-token"
# --- fail with log tail ---