Files
clawsec/scripts/i18n/link_check.py
T
David Abutbul b37162a33d feat(i18n): add multilingual wiki scaffolding, language switcher, and… (#212)
* feat(i18n): add multilingual wiki scaffolding, language switcher, and translation QA pipeline

* docs(readme): adopt picoclaw-style multilingual link bar

* fix(i18n): repair localized index links and tighten partial-pair QA

* ci(i18n): fail on broken markdown links in README/wiki

* ci(i18n): add changed-files mode for markdown link checks

* i18n(de): use local Argos MT to fill untranslated German sections

* i18n(es,fr): fill untranslated sections via local Argos workflow

* i18n(ja): fill untranslated sections with scoped local Argos pass

* i18n(ko): fill untranslated sections with scoped local Argos pass

* fix(i18n): address review feedback

---------

Co-authored-by: David Abutbul <David.a@prompt.security>
2026-04-29 09:00:31 +03:00

99 lines
2.9 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import re
import subprocess
from pathlib import Path
ROOT = Path(__file__).resolve().parents[2]
MARKDOWN_LINK_RE = re.compile(r"\[[^\]]+\]\(([^)]+)\)")
def _all_docs() -> set[Path]:
files = set(ROOT.glob("README*.md"))
files.update(ROOT.glob("wiki/**/*.md"))
return {p for p in files if p.is_file()}
def _changed_docs(base_ref: str) -> set[Path]:
cmd = ["git", "diff", "--name-only", f"{base_ref}...HEAD"]
output = subprocess.check_output(cmd, cwd=ROOT, text=True)
docs: set[Path] = set()
for rel in output.splitlines():
if not rel.endswith(".md"):
continue
p = (ROOT / rel).resolve()
if p in _all_docs() and p.exists():
docs.add(p)
return docs
def _should_skip(link: str) -> bool:
return (
not link
or link.startswith("#")
or "://" in link
or link.startswith("mailto:")
or link.startswith("tel:")
)
def _resolve_target(doc: Path, link: str) -> Path:
clean = link.split("#", 1)[0].strip()
return (doc.parent / clean).resolve()
def _select_docs(changed_only: bool, base_ref: str) -> list[Path]:
all_docs = _all_docs()
if not changed_only:
return sorted(all_docs)
try:
changed = _changed_docs(base_ref)
except Exception as exc: # noqa: BLE001
print(f"[link-check] WARN: changed-only mode failed ({exc}); falling back to full scan")
return sorted(all_docs)
if not changed:
print("[link-check] No changed markdown docs detected; nothing to check.")
return []
return sorted(changed)
def main() -> int:
parser = argparse.ArgumentParser(description="Check local markdown links in README/wiki docs")
parser.add_argument("--changed-only", action="store_true", help="Check only changed markdown docs against a base ref")
parser.add_argument("--base-ref", default="origin/main", help="Base ref for --changed-only (default: origin/main)")
args = parser.parse_args()
docs = _select_docs(args.changed_only, args.base_ref)
failures: list[str] = []
for doc in docs:
rel_doc = doc.relative_to(ROOT)
content = doc.read_text(encoding="utf-8")
for match in MARKDOWN_LINK_RE.finditer(content):
raw_link = match.group(1).strip()
if _should_skip(raw_link):
continue
target = _resolve_target(doc, raw_link)
if not target.exists():
failures.append(f"{rel_doc}: broken link -> {raw_link}")
if failures:
print(f"[link-check] FAILED: {len(failures)} broken link(s) found.")
for item in failures:
print(f" - {item}")
return 1
scope = "changed docs" if args.changed_only else "all docs"
print(f"[link-check] PASS: no broken local markdown links found ({scope}).")
return 0
if __name__ == "__main__":
raise SystemExit(main())