CI/CD pipeline: ssh -L tunnel for TIM API + manual Jenkins trigger
Two design pivots discovered during Phase B prerequisites: Routing: Replace static-route + NAT plan with persistent ssh -L tunnel from pve-201 to webzavod (deployment/systemd/flights-tim-tunnel.service). nginx proxies /api/ and /map/api/ to https://127.0.0.1:8443 with SNI/Host overrides so cert validation still targets the real hostname. No webzavod kernel changes (no ip_forward/MASQUERADE), no /etc/hosts pin needed. Workflow B: Drop Jenkins trigger/poll automation (operator lacks Jenkins job-configure access and user API token access). release.yml now stops after MR merge with a Telegram message containing the Jenkins job URL. release-verify.yml (new, workflow_dispatch only) runs the customer-URL e2e suite once the operator has triggered Jenkins manually and it has completed. Other: - SSR loopback port 8081 -> 3002 (8081 was taken by openwebui on pve-201) - notify-telegram.sh skips cleanly when TG secrets unset (was: hard-fail) - README + spec addendum cover the new prereqs and removed steps
This commit is contained in:
@@ -16,7 +16,7 @@ jobs:
|
||||
BASIC_AUTH_PASS: ${{ secrets.BASIC_AUTH_PASS }}
|
||||
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
FLIGHTS_WEB_PORT: '8081'
|
||||
FLIGHTS_WEB_PORT: '3002'
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
- name: Run Playwright e2e
|
||||
id: e2e
|
||||
env:
|
||||
BASE_URL: http://127.0.0.1:8081
|
||||
BASE_URL: http://127.0.0.1:3002
|
||||
run: pnpm test:e2e
|
||||
|
||||
- name: Rollback on failure (post-deploy steps)
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
name: release-verify
|
||||
|
||||
# Workflow C: run after Jenkins has finished building (operator triggers manually).
|
||||
# Probes the customer URL until it serves a fresh build, then runs the e2e suite
|
||||
# against http://flights-ui.devwebzavod.ru with the console-error gate.
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
verify:
|
||||
runs-on: pve-201
|
||||
timeout-minutes: 30
|
||||
env:
|
||||
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Notify start
|
||||
if: ${{ env.TELEGRAM_BOT_TOKEN != '' }}
|
||||
run: scripts/ci/notify-telegram.sh start release-verify
|
||||
|
||||
- name: Setup Node + pnpm
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version-file: '.nvmrc'
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Wait for customer URL
|
||||
id: wait_customer
|
||||
run: scripts/ci/wait-for-url.sh http://flights-ui.devwebzavod.ru/ru-ru/onlineboard 60 5
|
||||
|
||||
- name: Run Playwright e2e against customer URL
|
||||
id: e2e_customer
|
||||
env:
|
||||
BASE_URL: http://flights-ui.devwebzavod.ru
|
||||
run: pnpm test:e2e
|
||||
|
||||
- name: Upload artifacts on failure
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: release-verify-failure-${{ github.run_id }}
|
||||
path: |
|
||||
playwright-report/
|
||||
retention-days: 7
|
||||
|
||||
- name: Notify (success)
|
||||
if: success() && env.TELEGRAM_BOT_TOKEN != ''
|
||||
run: scripts/ci/notify-telegram.sh ok release-verify "customer URL e2e green"
|
||||
|
||||
- name: Notify (failure)
|
||||
if: failure() && env.TELEGRAM_BOT_TOKEN != ''
|
||||
run: scripts/ci/notify-telegram.sh fail release-verify "see Gitea run for Playwright report"
|
||||
@@ -6,20 +6,20 @@ on:
|
||||
tags:
|
||||
- 'release-*'
|
||||
|
||||
# Workflow B: sync to GitLab + open MR + auto-merge.
|
||||
# Stops at "MR merged" — Jenkins is triggered manually by the operator.
|
||||
# After Jenkins finishes, run the `release-verify` workflow to e2e the customer URL.
|
||||
|
||||
jobs:
|
||||
release:
|
||||
runs-on: pve-201
|
||||
timeout-minutes: 60
|
||||
timeout-minutes: 30
|
||||
env:
|
||||
GITLAB_PAT: ${{ secrets.GITLAB_PAT }}
|
||||
GITLAB_PROJECT_ID: ${{ secrets.GITLAB_PROJECT_ID }}
|
||||
GITLAB_HOST: 'https://teamscore.gitlab.yandexcloud.net'
|
||||
GITLAB_PROJECT_PATH: 'aeroflot2/flights-front'
|
||||
JENKINS_BASE_URL: 'http://jenkins.yc.devwebzavod.ru:8080'
|
||||
JENKINS_JOB_PATH: '/job/Aeroflot2/job/Flights-Front-Dev'
|
||||
JENKINS_USER: ${{ secrets.JENKINS_USER }}
|
||||
JENKINS_API_TOKEN: ${{ secrets.JENKINS_API_TOKEN }}
|
||||
JENKINS_TRIGGER_TOKEN: ${{ secrets.JENKINS_TRIGGER_TOKEN }}
|
||||
JENKINS_JOB_URL: 'http://jenkins.yc.devwebzavod.ru:8080/job/Aeroflot2/job/Flights-Front-Dev/'
|
||||
TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
|
||||
TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
|
||||
|
||||
@@ -37,8 +37,6 @@ jobs:
|
||||
id: gate
|
||||
run: |
|
||||
API="${GITHUB_SERVER_URL}/api/v1/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=${GITHUB_SHA}"
|
||||
# Gitea Actions API is similar to GitHub's; this query may differ slightly per Gitea version.
|
||||
# If the endpoint isn't available, fall back to a last-3-runs check via the workflows endpoint.
|
||||
resp=$(curl -fsS -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" "$API" || echo '{"workflow_runs":[]}')
|
||||
ok=$(echo "$resp" | jq -r --arg name "ci-deploy" '
|
||||
.workflow_runs[]
|
||||
@@ -70,8 +68,6 @@ jobs:
|
||||
|
||||
- name: Clone GitLab target
|
||||
id: clone
|
||||
env:
|
||||
GITLAB_PAT: ${{ secrets.GITLAB_PAT }}
|
||||
run: |
|
||||
rm -rf /tmp/flights-front
|
||||
git clone "https://oauth2:${GITLAB_PAT}@teamscore.gitlab.yandexcloud.net/aeroflot2/flights-front.git" /tmp/flights-front
|
||||
@@ -145,7 +141,7 @@ jobs:
|
||||
"${GITLAB_HOST}/api/v4/projects/${GITLAB_PROJECT_ID}/merge_requests/${{ steps.mr_open.outputs.iid }}/merge" \
|
||||
>/dev/null
|
||||
|
||||
- name: Cleanup MR + branch on failure (B:9-11 only)
|
||||
- name: Cleanup MR + branch on failure
|
||||
if: failure() && (steps.mr_open.outcome == 'failure' || steps.mr_approve.outcome == 'failure' || steps.mr_merge.outcome == 'failure')
|
||||
run: |
|
||||
IID="${{ steps.mr_open.outputs.iid }}"
|
||||
@@ -165,35 +161,11 @@ jobs:
|
||||
>/dev/null || true
|
||||
fi
|
||||
|
||||
- name: Trigger + wait for Jenkins
|
||||
id: jenkins
|
||||
if: steps.commit.outputs.skip_remaining != '1'
|
||||
run: scripts/ci/jenkins-trigger-and-wait.sh
|
||||
|
||||
- name: Wait for customer URL to update
|
||||
id: wait_customer
|
||||
if: steps.commit.outputs.skip_remaining != '1'
|
||||
run: scripts/ci/wait-for-url.sh http://flights-ui.devwebzavod.ru/ru-ru/onlineboard 60 5
|
||||
|
||||
- name: Run Playwright e2e against customer URL
|
||||
id: e2e_customer
|
||||
if: steps.commit.outputs.skip_remaining != '1'
|
||||
env:
|
||||
BASE_URL: http://flights-ui.devwebzavod.ru
|
||||
run: pnpm test:e2e
|
||||
|
||||
- name: Upload artifacts on failure
|
||||
if: failure()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: release-failure-${{ github.run_id }}
|
||||
path: |
|
||||
playwright-report/
|
||||
retention-days: 7
|
||||
|
||||
- name: Notify (success)
|
||||
- name: Notify (success — manual Jenkins trigger required)
|
||||
if: success() && env.TELEGRAM_BOT_TOKEN != ''
|
||||
run: scripts/ci/notify-telegram.sh ok release "MR ${{ steps.mr_open.outputs.url }}"
|
||||
run: |
|
||||
MR_URL='${{ steps.mr_open.outputs.url }}'
|
||||
scripts/ci/notify-telegram.sh ok release "MR merged: ${MR_URL}. Now trigger Jenkins manually: ${JENKINS_JOB_URL}, then dispatch the release-verify workflow."
|
||||
|
||||
- name: Notify (failure)
|
||||
if: failure() && env.TELEGRAM_BOT_TOKEN != ''
|
||||
|
||||
+59
-79
@@ -4,62 +4,46 @@ This is the bootstrap procedure for hosting `https://ui-dashboard.gnerim.ru/` on
|
||||
|
||||
## One-time setup
|
||||
|
||||
### 1. Routing pve-201 → TIM API (via webzavod)
|
||||
### 1. SSH tunnel pve-201 → webzavod (TIM API access)
|
||||
|
||||
**On webzavod (192.168.88.58)** — verify IP forwarding and MASQUERADE:
|
||||
The customer WAF on `flights.test.aeroflot.ru` only accepts requests from corp-VPN egress IPs. nginx proxies `/api/` and `/map/api/` to `https://127.0.0.1:8443`, which is forwarded over SSH to webzavod (which terminates the corp VPN on `ppp0`). A systemd unit keeps the tunnel up.
|
||||
|
||||
```bash
|
||||
sysctl net.ipv4.ip_forward # expect: 1
|
||||
sudo iptables -t nat -L POSTROUTING -nv | grep ppp0 # expect: MASQUERADE rule
|
||||
**On webzavod (192.168.88.58)** — append the pve-201 pubkey to `~gnezim/.ssh/authorized_keys` with `permitopen` restricting it to one host:port (one-time, read pve-201's `~gnezim/.ssh/id_rsa.pub` first):
|
||||
|
||||
```
|
||||
command="exit 1",no-pty,no-X11-forwarding,no-agent-forwarding,no-user-rc,permitopen="flights.test.aeroflot.ru:443" ssh-rsa AAAA…== pve-201-flights-tim-tunnel
|
||||
```
|
||||
|
||||
If missing:
|
||||
**On pve-201** — install + enable the systemd unit:
|
||||
|
||||
```bash
|
||||
echo 'net.ipv4.ip_forward=1' | sudo tee -a /etc/sysctl.conf
|
||||
sudo sysctl -p
|
||||
sudo iptables -t nat -A POSTROUTING -o ppp0 -j MASQUERADE
|
||||
sudo apt install iptables-persistent
|
||||
sudo netfilter-persistent save
|
||||
```
|
||||
|
||||
**On pve-201** — add a persistent static route to TIM via webzavod:
|
||||
|
||||
```yaml
|
||||
# /etc/netplan/01-routes.yaml — adjust NIC name as needed
|
||||
network:
|
||||
version: 2
|
||||
ethernets:
|
||||
<nic-name>: # replace with actual NIC name from `ip link show`
|
||||
routes:
|
||||
- to: 172.18.0.0/16
|
||||
via: 192.168.88.58
|
||||
```
|
||||
|
||||
```bash
|
||||
sudo netplan apply
|
||||
```
|
||||
|
||||
**On pve-201** — pin TIM hostnames to reachable A records (TIM DNS returns duplicate As, one of which is dead):
|
||||
|
||||
```bash
|
||||
echo '172.18.0.121 flights.test.aeroflot.ru' | sudo tee -a /etc/hosts
|
||||
cd /path/to/Aeroflot.Flights.Web
|
||||
sudo cp deployment/systemd/flights-tim-tunnel.service /etc/systemd/system/
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now flights-tim-tunnel.service
|
||||
sudo systemctl status flights-tim-tunnel.service --no-pager
|
||||
```
|
||||
|
||||
**Smoke test:**
|
||||
|
||||
```bash
|
||||
curl -v https://flights.test.aeroflot.ru/swagger/ # expect: 401 in <300ms
|
||||
ss -ltn | grep ':8443\b' # expect: a 127.0.0.1:8443 LISTEN line
|
||||
curl -k --resolve flights.test.aeroflot.ru:8443:127.0.0.1 \
|
||||
-o /dev/null -w 'swagger: %{http_code}\n' \
|
||||
https://flights.test.aeroflot.ru:8443/swagger/index.html # expect 401
|
||||
curl -k --resolve flights.test.aeroflot.ru:8443:127.0.0.1 \
|
||||
-o /dev/null -w 'api/health: %{http_code}\n' \
|
||||
https://flights.test.aeroflot.ru:8443/api/health # expect 200
|
||||
```
|
||||
|
||||
If this fails, fix routing/DNS before proceeding — nothing else will work.
|
||||
If swagger returns 200 with HTML body instead of 401, the tunnel is bypassed and the request egressed directly — fix the listener / SSH unit before proceeding.
|
||||
|
||||
### 2. nginx vhost
|
||||
|
||||
```bash
|
||||
cd /path/to/Aeroflot.Flights.Web # repo root, e.g. ~/repos/Aeroflot.Flights.Web
|
||||
cd /path/to/Aeroflot.Flights.Web
|
||||
sudo cp deployment/nginx/ui-dashboard.gnerim.ru.conf /etc/nginx/sites-available/
|
||||
sudo ln -s /etc/nginx/sites-available/ui-dashboard.gnerim.ru.conf /etc/nginx/sites-enabled/
|
||||
sudo ln -sf /etc/nginx/sites-available/ui-dashboard.gnerim.ru.conf /etc/nginx/sites-enabled/
|
||||
sudo mkdir -p /etc/nginx/htpasswd
|
||||
sudo nginx -t
|
||||
sudo systemctl reload nginx
|
||||
@@ -81,19 +65,19 @@ Reachability checks the runner must pass:
|
||||
```bash
|
||||
curl -fsS https://git.gnerim.ru/ # Gitea
|
||||
curl -fsSI https://teamscore.gitlab.yandexcloud.net/ # GitLab
|
||||
curl -fsSI http://jenkins.yc.devwebzavod.ru:8080/ # Jenkins (via static route)
|
||||
curl -fsSI http://flights-ui.devwebzavod.ru/ # Customer URL (via static route)
|
||||
```
|
||||
|
||||
The customer Jenkins URL and the customer site (`flights-ui.devwebzavod.ru`) are NOT reachable from the runner directly — Workflow B does not call them. Customer-side e2e (Workflow C, `release-verify`) only runs after the operator has manually triggered the Jenkins build, and it reaches the customer URL the same way the upstream API is reached: direct egress where possible, or through additional tunnels added on demand.
|
||||
|
||||
### 4. GitLab Personal Access Token
|
||||
|
||||
GitLab → User Settings → Access Tokens → create with scopes `api` and `write_repository`. Store as Gitea Actions secret `GITLAB_PAT`.
|
||||
|
||||
### 5. Allow self-approve on GitLab project
|
||||
|
||||
GitLab → flights-front project → Settings → Merge requests → Approval rules → uncheck **"Prevent approval by author"**.
|
||||
GitLab → flights-front project → Settings → Merge requests → Approval rules → uncheck **"Prevent approval by author"** (skip if you can already approve your own MRs in the GitLab UI).
|
||||
|
||||
Verify by running (locally, after PAT is in place — script is created in Task 17 of the plan):
|
||||
Verify by running (locally, after PAT is in place):
|
||||
|
||||
```bash
|
||||
GITLAB_PAT=<pat> ./scripts/ci/check-gitlab-project.sh
|
||||
@@ -101,29 +85,26 @@ GITLAB_PAT=<pat> ./scripts/ci/check-gitlab-project.sh
|
||||
|
||||
It prints the numeric project ID (store as `GITLAB_PROJECT_ID` secret) and confirms self-approve is allowed.
|
||||
|
||||
### 6. Jenkins remote trigger token
|
||||
|
||||
Jenkins → `Aeroflot2/Flights-Front-Dev` job → Configure → check **"Trigger builds remotely"** → set token (e.g. `flights-cd-trigger`). Store as `JENKINS_TRIGGER_TOKEN`.
|
||||
|
||||
Also: Jenkins → User → Configure → API Token → Add new token. Store username as `JENKINS_USER`, token as `JENKINS_API_TOKEN`.
|
||||
|
||||
### 7. Telegram bot
|
||||
### 6. Telegram bot (optional)
|
||||
|
||||
Use existing bot or create via @BotFather. Get the chat_id by sending a message and querying `https://api.telegram.org/bot<TOKEN>/getUpdates`. Store as `TELEGRAM_BOT_TOKEN` and `TELEGRAM_CHAT_ID`.
|
||||
|
||||
### 8. Gitea Actions secrets summary
|
||||
If either secret is unset, all `notify-telegram.sh` calls in the workflows skip cleanly with no error — the pipeline runs end-to-end without Telegram configured.
|
||||
|
||||
### 7. Gitea Actions secrets summary
|
||||
|
||||
Repo → Settings → Actions → Secrets — set all of:
|
||||
|
||||
| Secret | Purpose |
|
||||
|---|---|
|
||||
| `BASIC_AUTH_USER`, `BASIC_AUTH_PASS` | nginx htpasswd |
|
||||
| `MAP_TILE_URL` | Default `/map/api/tile/{z}/{x}/{y}.jpeg` |
|
||||
| `API_BASE_URL` | Default `/api` |
|
||||
| `GITLAB_PAT`, `GITLAB_PROJECT_ID` | GitLab MR API |
|
||||
| `JENKINS_USER`, `JENKINS_API_TOKEN`, `JENKINS_TRIGGER_TOKEN` | Jenkins API |
|
||||
| `TELEGRAM_BOT_TOKEN`, `TELEGRAM_CHAT_ID` | Notifications |
|
||||
| `GITHUB_TOKEN` | Auto-provided by Gitea Actions — no manual setup required |
|
||||
| Secret | Required | Purpose |
|
||||
|---|---|---|
|
||||
| `BASIC_AUTH_USER`, `BASIC_AUTH_PASS` | yes | nginx htpasswd for `ui-dashboard.gnerim.ru` |
|
||||
| `MAP_TILE_URL` | optional | Default `/map/api/tile/{z}/{x}/{y}.jpeg` |
|
||||
| `API_BASE_URL` | optional | Default `/api` |
|
||||
| `GITLAB_PAT`, `GITLAB_PROJECT_ID` | yes (release only) | GitLab MR API |
|
||||
| `TELEGRAM_BOT_TOKEN`, `TELEGRAM_CHAT_ID` | optional | Notifications |
|
||||
| `GITHUB_TOKEN` | auto | Provided by Gitea Actions — no manual setup required |
|
||||
|
||||
Jenkins is triggered manually after the release workflow merges to GitLab; no Jenkins secret is required.
|
||||
|
||||
## Verifying failure paths
|
||||
|
||||
@@ -148,7 +129,7 @@ Then push a commit that fails e2e. Rollback step finds no `:previous` and bails.
|
||||
|
||||
- Telegram message: `🔥 ci-deploy ROLLBACK FAILED — site is DOWN`
|
||||
- `https://ui-dashboard.gnerim.ru/` returns 502.
|
||||
- Manual recovery: `ssh pve-201 'docker stop flights-web 2>/dev/null; docker rm flights-web 2>/dev/null; docker run -d --name flights-web --restart unless-stopped -p 127.0.0.1:8081:8080 flights-web:<known-good-sha>'`.
|
||||
- Manual recovery: `ssh pve-201 'docker stop flights-web 2>/dev/null; docker rm flights-web 2>/dev/null; docker run -d --name flights-web --restart unless-stopped -p 127.0.0.1:3002:8080 flights-web:<known-good-sha>'`.
|
||||
|
||||
### B: blocked on A not green
|
||||
|
||||
@@ -157,27 +138,15 @@ Trigger Workflow B (manual or tag) for a SHA that has no green Workflow A run. V
|
||||
- Telegram message: `⚠️ release blocked — workflow ci-deploy is not green for <sha>`
|
||||
- B exits early; nothing changes in GitLab.
|
||||
|
||||
### B: Jenkins poll timeout
|
||||
|
||||
Temporarily edit `scripts/ci/jenkins-trigger-and-wait.sh` to change the default:
|
||||
```bash
|
||||
TIMEOUT="${JENKINS_TIMEOUT:-30}" # was 1800
|
||||
```
|
||||
Push to a throwaway branch, trigger Workflow B from that branch via the Gitea UI, and confirm:
|
||||
- Telegram message: `❌ release FAILED at Jenkins build` (because polling gives up after 30s)
|
||||
- The Jenkins job itself may continue running — that's fine, it's outside our control.
|
||||
|
||||
**Restore the original 1800 default** and force-delete the throwaway branch when done.
|
||||
|
||||
## Manual recovery scenarios
|
||||
|
||||
### Workflow B failed at step 12-13 (Jenkins) — MR merged but customer site stale
|
||||
### Workflow B succeeded but Jenkins build failed
|
||||
|
||||
GitLab is already at the new commit; Jenkins didn't deploy. Recovery:
|
||||
GitLab is at the new commit; customer site is stale. Recovery:
|
||||
|
||||
1. Open Jenkins UI → click "Build Now" on the same job, or
|
||||
2. Push a new commit to GitLab to re-trigger Jenkins polling (if it's set up that way), or
|
||||
3. Re-run Workflow B from a green Workflow A — but only if you also pushed new code; otherwise B will sync a no-op and skip.
|
||||
1. Open Jenkins UI → check the failing build's console log
|
||||
2. Fix the issue (in this repo if it's our bug, in customer's infra otherwise)
|
||||
3. Push fix → Workflow A → Workflow B → trigger Jenkins again
|
||||
|
||||
### Container running but nginx returns 502
|
||||
|
||||
@@ -186,7 +155,7 @@ Check the bind:
|
||||
```bash
|
||||
ssh pve-201
|
||||
docker ps --filter name=flights-web
|
||||
curl -v http://127.0.0.1:8081/ # should return 200 (or whatever the SSR root returns)
|
||||
curl -v http://127.0.0.1:3002/ # should return 200 (or whatever the SSR root returns)
|
||||
sudo nginx -t && sudo systemctl reload nginx
|
||||
```
|
||||
|
||||
@@ -195,5 +164,16 @@ If the container died, the Restart policy `unless-stopped` should bring it back.
|
||||
```bash
|
||||
docker logs flights-web --tail 200
|
||||
docker stop flights-web 2>/dev/null; docker rm flights-web 2>/dev/null
|
||||
docker run -d --name flights-web --restart unless-stopped -p 127.0.0.1:8081:8080 flights-web:current
|
||||
docker run -d --name flights-web --restart unless-stopped -p 127.0.0.1:3002:8080 flights-web:current
|
||||
```
|
||||
|
||||
### TIM tunnel is down (502 on /api/* but / works)
|
||||
|
||||
```bash
|
||||
sudo systemctl status flights-tim-tunnel.service --no-pager
|
||||
sudo journalctl -u flights-tim-tunnel.service -n 50 --no-pager
|
||||
sudo systemctl restart flights-tim-tunnel.service
|
||||
ss -ltn | grep ':8443\b' # confirm listener is back
|
||||
```
|
||||
|
||||
If the tunnel won't come up, verify SSH key is still authorised on webzavod and that webzavod's `ppp0` is up (`ssh webzavod 'ip -br addr show ppp0'`).
|
||||
|
||||
@@ -18,9 +18,9 @@ server {
|
||||
auth_basic "ui-dashboard";
|
||||
auth_basic_user_file /etc/nginx/htpasswd/ui-dashboard;
|
||||
|
||||
# SSR app on loopback (container bound to 127.0.0.1:8081)
|
||||
# SSR app on loopback (container bound to 127.0.0.1:3002)
|
||||
location / {
|
||||
proxy_pass http://127.0.0.1:8081;
|
||||
proxy_pass http://127.0.0.1:3002;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
@@ -32,21 +32,26 @@ server {
|
||||
}
|
||||
|
||||
# API proxy — bypass basic auth (gates HTML, not API).
|
||||
# Static route on the host sends 172.18.0.0/16 via 192.168.88.58 (webzavod).
|
||||
# /etc/hosts pins flights.test.aeroflot.ru → 172.18.0.121.
|
||||
# Routed via the flights-tim-tunnel.service systemd unit (see
|
||||
# deployment/systemd/flights-tim-tunnel.service): 127.0.0.1:8443 is an
|
||||
# ssh -L tunnel to webzavod which exits via ppp0 with a corp-VPN source IP
|
||||
# the upstream WAF whitelists. SNI/Host are set explicitly because the
|
||||
# TCP target is loopback rather than the real hostname.
|
||||
location /api/ {
|
||||
auth_basic off;
|
||||
proxy_pass https://flights.test.aeroflot.ru;
|
||||
proxy_pass https://127.0.0.1:8443;
|
||||
proxy_set_header Host flights.test.aeroflot.ru;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_ssl_server_name on;
|
||||
proxy_ssl_name flights.test.aeroflot.ru;
|
||||
}
|
||||
|
||||
location /map/api/ {
|
||||
auth_basic off;
|
||||
proxy_pass https://flights.test.aeroflot.ru;
|
||||
proxy_pass https://127.0.0.1:8443;
|
||||
proxy_set_header Host flights.test.aeroflot.ru;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_ssl_server_name on;
|
||||
proxy_ssl_name flights.test.aeroflot.ru;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
# SSH local-forward tunnel: pve-201 -> webzavod -> flights.test.aeroflot.ru:443.
|
||||
#
|
||||
# nginx on pve-201 proxies /api/ and /map/api/ to https://127.0.0.1:8443. This
|
||||
# unit forwards 8443 over SSH to webzavod (192.168.88.58), which terminates the
|
||||
# corp VPN (ppp0). The customer WAF whitelists webzavod's egress IP, so requests
|
||||
# arriving via this tunnel reach the real backend instead of the WAF interstitial.
|
||||
#
|
||||
# Webzavod's authorized_keys entry restricts this key to:
|
||||
# command="exit 1",no-pty,no-X11-forwarding,no-agent-forwarding,no-user-rc,
|
||||
# permitopen="flights.test.aeroflot.ru:443"
|
||||
#
|
||||
# Install:
|
||||
# sudo cp deployment/systemd/flights-tim-tunnel.service /etc/systemd/system/
|
||||
# sudo systemctl daemon-reload
|
||||
# sudo systemctl enable --now flights-tim-tunnel.service
|
||||
#
|
||||
# Verify:
|
||||
# ss -ltn | grep ':8443\b'
|
||||
# curl -k --resolve flights.test.aeroflot.ru:8443:127.0.0.1 \
|
||||
# -o /dev/null -w '%{http_code}\n' \
|
||||
# https://flights.test.aeroflot.ru:8443/swagger/index.html # expect 401
|
||||
|
||||
[Unit]
|
||||
Description=SSH tunnel pve-201->webzavod for flights.test.aeroflot.ru:443
|
||||
Documentation=https://git.gnerim.ru/gnezim/Aeroflot.Flights.Web
|
||||
Wants=network-online.target
|
||||
After=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=gnezim
|
||||
ExecStart=/usr/bin/ssh -N \
|
||||
-o BatchMode=yes \
|
||||
-o ExitOnForwardFailure=yes \
|
||||
-o ServerAliveInterval=30 \
|
||||
-o ServerAliveCountMax=3 \
|
||||
-o StrictHostKeyChecking=accept-new \
|
||||
-L 127.0.0.1:8443:flights.test.aeroflot.ru:443 \
|
||||
gnezim@192.168.88.58
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@@ -485,3 +485,52 @@ When a private registry comes online (eventual `registry.gnerim.ru`), changes:
|
||||
2. **The 9 untracked `snap-*.yml` files at repo root** look like throwaway parity-snapshot artifacts. Add to `.gitignore` or commit? Verify before flipping pipeline on (prereq #14).
|
||||
3. **e2e portability to remote `BASE_URL`** — existing specs were written against localhost. Many likely hardcode paths or rely on dev-only state. Layer 2 of testing strategy budgets time for this.
|
||||
4. **Initial console-allowlist content** — empty starter; will be populated on first runs ("we'll figure it out in future" per design discussion).
|
||||
|
||||
---
|
||||
|
||||
## Addendum 2026-04-27 — routing change + manual Jenkins trigger
|
||||
|
||||
Two design pivots discovered during Phase B prerequisites work:
|
||||
|
||||
### Routing: ssh -L tunnel instead of static-route + NAT
|
||||
|
||||
Original design: static route on pve-201 pushes `<TIM-CIDR>` via webzavod's LAN IP, webzavod NATs LAN→ppp0, `/etc/hosts` pins `flights.test.aeroflot.ru` to an internal A record.
|
||||
|
||||
Discovered:
|
||||
- `flights.test.aeroflot.ru` resolves to public IPs from both pve-201 and webzavod (no internal A record exists).
|
||||
- pve-201 reaches the public IP directly with HTTP 200, **but the response is a WAF interstitial** — the customer WAF returns 200/HTML for non-corp egress and 401/JSON-ready for corp egress.
|
||||
- The same URL from webzavod returns 401 (real backend) — webzavod's `ppp0` egress IP is whitelisted.
|
||||
|
||||
New design: persistent `ssh -L 127.0.0.1:8443:flights.test.aeroflot.ru:443` from pve-201 to webzavod via systemd unit `deployment/systemd/flights-tim-tunnel.service`. nginx proxies `/api/` and `/map/api/` to `https://127.0.0.1:8443` with `Host` and `proxy_ssl_name` overrides so SNI/cert validation still target the real hostname.
|
||||
|
||||
Webzavod-side authorisation pinned with `command="exit 1",no-pty,no-X11-forwarding,no-agent-forwarding,no-user-rc,permitopen="flights.test.aeroflot.ru:443"` — the key cannot open a shell, agent-forward, or forward any other host:port.
|
||||
|
||||
Trade-offs vs. original:
|
||||
- ✅ No webzavod kernel changes (no `ip_forward` toggle, no MASQUERADE rule, no iptables-persistent).
|
||||
- ✅ No `/etc/hosts` pin needed (DNS resolution happens on webzavod, where the real IPs work).
|
||||
- ✅ Recoverable in seconds (`systemctl restart flights-tim-tunnel`).
|
||||
- ⚠ Per-host SSH tunnel — adding another upstream means another `-L` line. Currently only one upstream.
|
||||
- ⚠ Discovered OpenSSH 9.6 quirk: `restrict + permitopen` causes TLS handshake to EOF mid-stream. Using explicit `no-*` options instead of `restrict` works.
|
||||
|
||||
### Workflow B: drop Jenkins automation
|
||||
|
||||
Original design: Workflow B triggers Jenkins via remote-build token, polls build status via authenticated API, then runs e2e against customer URL.
|
||||
|
||||
Constraint: operator does not have Jenkins job-configure access (no remote-trigger token) nor Jenkins user API token access. Authenticated API trigger and polling are not possible without admin involvement.
|
||||
|
||||
New design:
|
||||
- **Workflow B (`release.yml`)** — sync to GitLab, open MR, auto-approve, auto-merge, **stop**. Telegram notify includes the Jenkins job URL with instructions to trigger by hand.
|
||||
- **Workflow C (`release-verify.yml`)** — `workflow_dispatch` only. Operator runs manually after Jenkins finishes. Probes customer URL until reachable, runs Playwright e2e against `http://flights-ui.devwebzavod.ru` with the console-error gate, notifies Telegram.
|
||||
|
||||
Removed from the repo:
|
||||
- `scripts/ci/jenkins-trigger-and-wait.sh`
|
||||
- `tests/ci/test-jenkins-trigger.sh`
|
||||
- `tests/ci/fixtures/jenkins-{success,failure}-flow.json`
|
||||
- `JENKINS_USER`, `JENKINS_API_TOKEN`, `JENKINS_TRIGGER_TOKEN` secrets
|
||||
|
||||
Trade-off: lose automated end-to-end pipeline. Acceptable because (a) operator already triggers Jenkins manually today, (b) the manual step is a checkpoint where build failures surface clearly, (c) future Jenkins API access can swap C back into B without changing the rest of the design.
|
||||
|
||||
### Other small adjustments
|
||||
|
||||
- SSR container loopback port changed from `8081` → `3002` (port 8081 already in use on pve-201 by openwebui).
|
||||
- `notify-telegram.sh` now skips cleanly when Telegram secrets are unset (was: hard-fail). Lets the pipeline run end-to-end without TG configured.
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#
|
||||
# Env:
|
||||
# GITHUB_SHA (required for swap)
|
||||
# FLIGHTS_WEB_PORT (default 8081 — host port that nginx proxies to)
|
||||
# FLIGHTS_WEB_PORT (default 3002 — host port that nginx proxies to)
|
||||
# IMAGE_NAME (default flights-web — set this to point at a registry later)
|
||||
set -euo pipefail
|
||||
|
||||
@@ -20,7 +20,7 @@ if [ "${1:-}" = "--dry-run" ]; then
|
||||
fi
|
||||
|
||||
CMD="${1:-}"
|
||||
PORT="${FLIGHTS_WEB_PORT:-8081}"
|
||||
PORT="${FLIGHTS_WEB_PORT:-3002}"
|
||||
IMAGE="${IMAGE_NAME:-flights-web}"
|
||||
|
||||
run() {
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# jenkins-trigger-and-wait.sh — fire a Jenkins job and wait for completion.
|
||||
#
|
||||
# Usage:
|
||||
# jenkins-trigger-and-wait.sh # real mode (env-driven)
|
||||
# jenkins-trigger-and-wait.sh --mock-mode <fixture.json> # for tests
|
||||
#
|
||||
# Env (real mode):
|
||||
# JENKINS_BASE_URL e.g. http://jenkins.yc.devwebzavod.ru:8080
|
||||
# JENKINS_JOB_PATH e.g. /job/Aeroflot2/job/Flights-Front-Dev
|
||||
# JENKINS_USER, JENKINS_API_TOKEN
|
||||
# JENKINS_TRIGGER_TOKEN
|
||||
# JENKINS_TIMEOUT seconds (default 1800)
|
||||
# JENKINS_POLL_INTERVAL seconds (default 10)
|
||||
set -euo pipefail
|
||||
|
||||
MODE=real
|
||||
FIXTURE=""
|
||||
if [ "${1:-}" = "--mock-mode" ]; then
|
||||
MODE=mock
|
||||
FIXTURE="${2:-}"
|
||||
[ -n "$FIXTURE" ] || { echo "usage: $0 --mock-mode <fixture.json>" >&2; exit 2; }
|
||||
command -v jq >/dev/null 2>&1 || { echo "fatal: jq required for --mock-mode" >&2; exit 2; }
|
||||
fi
|
||||
|
||||
POLL_INTERVAL="${JENKINS_POLL_INTERVAL:-10}"
|
||||
TIMEOUT="${JENKINS_TIMEOUT:-1800}"
|
||||
|
||||
if [ "$MODE" = real ]; then
|
||||
: "${JENKINS_BASE_URL:?required}"
|
||||
: "${JENKINS_JOB_PATH:?required}"
|
||||
: "${JENKINS_USER:?required}"
|
||||
: "${JENKINS_API_TOKEN:?required}"
|
||||
: "${JENKINS_TRIGGER_TOKEN:?required}"
|
||||
fi
|
||||
|
||||
# ── Mock mode: walk fixture deterministically ─────────────────────────────────
|
||||
if [ "$MODE" = mock ]; then
|
||||
QUEUE_URL=$(jq -r '.trigger_response.headers.Location' "$FIXTURE")
|
||||
echo "triggered (mock): queue=$QUEUE_URL"
|
||||
|
||||
# Walk queue polls until we get an executable.
|
||||
count=$(jq '.queue_polls | length' "$FIXTURE")
|
||||
BUILD_URL=""
|
||||
for i in $(seq 0 $((count - 1))); do
|
||||
body=$(jq -c ".queue_polls[$i].body" "$FIXTURE")
|
||||
exe_url=$(printf '%s' "$body" | jq -r '.executable.url // empty')
|
||||
if [ -n "$exe_url" ]; then
|
||||
BUILD_URL="$exe_url"
|
||||
break
|
||||
fi
|
||||
echo "queue poll $((i + 1)): not yet"
|
||||
done
|
||||
[ -n "${BUILD_URL:-}" ] || { echo "fatal: queue never produced executable" >&2; exit 1; }
|
||||
echo "build url (mock): $BUILD_URL"
|
||||
|
||||
# Walk build polls until result != null.
|
||||
count=$(jq '.build_polls | length' "$FIXTURE")
|
||||
for i in $(seq 0 $((count - 1))); do
|
||||
body=$(jq -c ".build_polls[$i].body" "$FIXTURE")
|
||||
result=$(printf '%s' "$body" | jq -r '.result // empty')
|
||||
number=$(printf '%s' "$body" | jq -r '.number')
|
||||
if [ -n "$result" ]; then
|
||||
if [ "$result" = "SUCCESS" ]; then
|
||||
echo "build #${number} SUCCESS"
|
||||
exit 0
|
||||
else
|
||||
echo "build #${number} ${result}" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo "build poll $((i + 1)): building"
|
||||
done
|
||||
echo "fatal: build never completed within fixture" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Real mode ─────────────────────────────────────────────────────────────────
|
||||
TRIGGER_URL="${JENKINS_BASE_URL}${JENKINS_JOB_PATH}/build?token=${JENKINS_TRIGGER_TOKEN}"
|
||||
echo "triggering: $TRIGGER_URL"
|
||||
|
||||
# -D - dumps headers; -o /dev/null discards body. We need the Location header.
|
||||
HEADERS=$(curl -fsS -X POST -u "${JENKINS_USER}:${JENKINS_API_TOKEN}" -D - -o /dev/null "$TRIGGER_URL")
|
||||
QUEUE_URL=$(printf '%s' "$HEADERS" | grep -i '^Location:' | head -1 | sed 's/^[Ll]ocation:[[:space:]]*//' | tr -d '\r\n')
|
||||
[ -n "$QUEUE_URL" ] || { echo "fatal: no Location header from Jenkins" >&2; exit 1; }
|
||||
echo "queue: $QUEUE_URL"
|
||||
|
||||
# Poll queue for executable.url. START covers both queue + build phases.
|
||||
START=$(date +%s)
|
||||
BUILD_URL=""
|
||||
while [ -z "$BUILD_URL" ]; do
|
||||
resp=$(curl -fsS -u "${JENKINS_USER}:${JENKINS_API_TOKEN}" "${QUEUE_URL}api/json")
|
||||
BUILD_URL=$(printf '%s' "$resp" | jq -r '.executable.url // empty')
|
||||
[ -n "$BUILD_URL" ] && break
|
||||
now=$(date +%s)
|
||||
if [ $((now - START)) -ge "$TIMEOUT" ]; then
|
||||
echo "fatal: queue timeout after ${TIMEOUT}s" >&2
|
||||
exit 1
|
||||
fi
|
||||
sleep "$POLL_INTERVAL"
|
||||
done
|
||||
echo "build: $BUILD_URL"
|
||||
|
||||
# Poll build for result. Timeout window is shared with queue phase (START not reset).
|
||||
while :; do
|
||||
resp=$(curl -fsS -u "${JENKINS_USER}:${JENKINS_API_TOKEN}" "${BUILD_URL}api/json")
|
||||
result=$(printf '%s' "$resp" | jq -r '.result // empty')
|
||||
number=$(printf '%s' "$resp" | jq -r '.number')
|
||||
if [ -n "$result" ]; then
|
||||
if [ "$result" = "SUCCESS" ]; then
|
||||
echo "build #${number} SUCCESS"
|
||||
exit 0
|
||||
else
|
||||
echo "build #${number} ${result} — see ${BUILD_URL}console" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
now=$(date +%s)
|
||||
if [ $((now - START)) -ge "$TIMEOUT" ]; then
|
||||
echo "fatal: build timeout after ${TIMEOUT}s — see ${BUILD_URL}console" >&2
|
||||
exit 1
|
||||
fi
|
||||
sleep "$POLL_INTERVAL"
|
||||
done
|
||||
@@ -28,8 +28,10 @@ esac
|
||||
[ -n "$STAGE" ] || { echo "usage: $0 [--dry-run] <start|ok|fail> <stage> [<extra-context>]" >&2; exit 2; }
|
||||
|
||||
if [ "$DRY_RUN" -eq 0 ]; then
|
||||
: "${TELEGRAM_BOT_TOKEN:?TELEGRAM_BOT_TOKEN required}"
|
||||
: "${TELEGRAM_CHAT_ID:?TELEGRAM_CHAT_ID required}"
|
||||
if [ -z "${TELEGRAM_BOT_TOKEN:-}" ] || [ -z "${TELEGRAM_CHAT_ID:-}" ]; then
|
||||
echo "notify-telegram: TELEGRAM_BOT_TOKEN/TELEGRAM_CHAT_ID unset — skipping" >&2
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
REPO="${GITHUB_REPOSITORY:-unknown/repo}"
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
{
|
||||
"trigger_response": {
|
||||
"status": 201,
|
||||
"headers": {
|
||||
"Location": "http://jenkins.test/queue/item/78/"
|
||||
}
|
||||
},
|
||||
"queue_polls": [
|
||||
{"status": 200, "body": {"executable": {"number": 43, "url": "http://jenkins.test/job/Aeroflot2/job/Flights-Front-Dev/43/"}}}
|
||||
],
|
||||
"build_polls": [
|
||||
{"status": 200, "body": {"building": true, "result": null, "number": 43}},
|
||||
{"status": 200, "body": {"building": false, "result": "FAILURE", "number": 43}}
|
||||
]
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
{
|
||||
"trigger_response": {
|
||||
"status": 201,
|
||||
"headers": {
|
||||
"Location": "http://jenkins.test/queue/item/77/"
|
||||
}
|
||||
},
|
||||
"queue_polls": [
|
||||
{"status": 200, "body": {"why": "in queue", "executable": null}},
|
||||
{"status": 200, "body": {"why": "in queue", "executable": null}},
|
||||
{"status": 200, "body": {"executable": {"number": 42, "url": "http://jenkins.test/job/Aeroflot2/job/Flights-Front-Dev/42/"}}}
|
||||
],
|
||||
"build_polls": [
|
||||
{"status": 200, "body": {"building": true, "result": null, "number": 42}},
|
||||
{"status": 200, "body": {"building": true, "result": null, "number": 42}},
|
||||
{"status": 200, "body": {"building": false, "result": "SUCCESS", "number": 42}}
|
||||
]
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
|
||||
SCRIPT="$ROOT/scripts/ci/jenkins-trigger-and-wait.sh"
|
||||
[ -x "$SCRIPT" ] || { echo "FAIL: $SCRIPT not executable"; exit 1; }
|
||||
|
||||
# Mock-mode tests need jq — bail with a useful message if unavailable.
|
||||
command -v jq >/dev/null 2>&1 || { echo "SKIP: jq not installed"; exit 0; }
|
||||
|
||||
# --- success path ---
|
||||
if ! "$SCRIPT" --mock-mode "$ROOT/tests/ci/fixtures/jenkins-success-flow.json" 2>&1 | tee /tmp/jenkins-test.log; then
|
||||
echo "FAIL: success fixture should exit 0"
|
||||
exit 1
|
||||
fi
|
||||
grep -q "build #42 SUCCESS" /tmp/jenkins-test.log || { echo "FAIL: expected 'build #42 SUCCESS'"; exit 1; }
|
||||
|
||||
# --- failure path ---
|
||||
if "$SCRIPT" --mock-mode "$ROOT/tests/ci/fixtures/jenkins-failure-flow.json" 2>&1 | tee /tmp/jenkins-test.log; then
|
||||
echo "FAIL: failure fixture should exit non-zero"
|
||||
exit 1
|
||||
fi
|
||||
grep -q "FAILURE" /tmp/jenkins-test.log || { echo "FAIL: expected 'FAILURE' in output"; exit 1; }
|
||||
|
||||
# --- bad usage ---
|
||||
if "$SCRIPT" 2>/dev/null; then
|
||||
echo "FAIL: expected usage error"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "PASS: jenkins-trigger-and-wait.sh"
|
||||
@@ -37,12 +37,18 @@ out=$("$SCRIPT" --dry-run fail ci-deploy "Run Playwright e2e")
|
||||
assert_contains "$out" "❌ ci-deploy FAILED"
|
||||
assert_contains "$out" "Run Playwright e2e"
|
||||
|
||||
# --- missing env should error in non-dry-run ---
|
||||
# --- missing env in non-dry-run: should skip cleanly (exit 0, log to stderr) ---
|
||||
unset TELEGRAM_BOT_TOKEN
|
||||
if "$SCRIPT" ok ci-deploy 2>/dev/null; then
|
||||
echo "FAIL: expected error when TELEGRAM_BOT_TOKEN missing"
|
||||
set +e
|
||||
err=$("$SCRIPT" ok ci-deploy 2>&1 >/dev/null)
|
||||
rc=$?
|
||||
set -e
|
||||
if [ $rc -ne 0 ]; then
|
||||
echo "FAIL: expected exit 0 when TELEGRAM_BOT_TOKEN missing (got $rc)"
|
||||
exit 1
|
||||
fi
|
||||
assert_contains "$err" "skipping"
|
||||
export TELEGRAM_BOT_TOKEN="test-token"
|
||||
|
||||
|
||||
# --- fail with log tail ---
|
||||
|
||||
Reference in New Issue
Block a user