Files
clawsec/scripts/populate-local-feed.sh
T
davida-ps e0eae65586 refactor(ci): extract shared exploitability enrichment helper (#95)
* refactor(ci): share exploitability enrichment script

* refactor(scripts): reuse shared exploitability enricher in local feed
2026-03-01 21:50:10 +02:00

387 lines
12 KiB
Bash
Executable File

#!/bin/bash
# populate-local-feed.sh
# Polls NVD API for real CVE data and populates local advisory feed for development preview.
# This mirrors the GitHub Actions pipeline logic exactly.
#
# Usage: ./scripts/populate-local-feed.sh [--days N] [--force]
# --days N Look back N days (default: 120)
# --force Ignore existing advisories and fetch all
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
# shellcheck source=./feed-utils.sh
source "$SCRIPT_DIR/feed-utils.sh"
# Configuration - same as pipeline
init_feed_paths "$PROJECT_ROOT"
KEYWORDS="OpenClaw clawdbot Moltbot NanoClaw WhatsApp-bot baileys"
GITHUB_REF_PATTERN="github.com/openclaw/openclaw github.com/qwibitai/NanoClaw"
ENRICH_SCRIPT="$PROJECT_ROOT/scripts/ci/enrich_exploitability.sh"
# Parse args
DAYS_BACK=120
FORCE=false
while [[ $# -gt 0 ]]; do
case $1 in
--days)
DAYS_BACK="$2"
shift 2
;;
--force)
FORCE=true
shift
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done
echo "=== ClawSec Local Feed Populator ==="
echo "Project root: $PROJECT_ROOT"
echo "Days back: $DAYS_BACK"
echo "Force mode: $FORCE"
echo ""
# Verify enrichment helper exists (it validates Python/analyzer prerequisites internally).
if [ ! -x "$ENRICH_SCRIPT" ]; then
echo "Error: Exploitability enrichment helper not found or not executable: $ENRICH_SCRIPT"
exit 1
fi
# Create temp directory
TEMP_DIR=$(mktemp -d)
trap 'rm -rf "$TEMP_DIR"' EXIT
# Determine date window
if [ -f "$FEED_PATH" ] && [ "$FORCE" = "false" ]; then
LAST_UPDATED=$(jq -r '.updated // empty' "$FEED_PATH")
if [ -n "$LAST_UPDATED" ]; then
START_DATE="$LAST_UPDATED"
echo "Using last updated from feed: $START_DATE"
fi
fi
if [ -z "${START_DATE:-}" ]; then
# macOS vs Linux date compatibility
if date -v-1d > /dev/null 2>&1; then
START_DATE=$(date -u -v-"${DAYS_BACK}"d +%Y-%m-%dT%H:%M:%S.000Z)
else
START_DATE=$(date -u -d "${DAYS_BACK} days ago" +%Y-%m-%dT%H:%M:%S.000Z)
fi
echo "Using default start date: $START_DATE"
fi
END_DATE=$(date -u +%Y-%m-%dT%H:%M:%S.000Z)
echo "End date: $END_DATE"
echo ""
# URL encode dates
START_ENC=${START_DATE//:/%3A}
END_ENC=${END_DATE//:/%3A}
echo "=== Fetching CVEs from NVD ==="
for KEYWORD in $KEYWORDS; do
echo "Fetching keyword: $KEYWORD"
URL="https://services.nvd.nist.gov/rest/json/cves/2.0?keywordSearch=${KEYWORD}&lastModStartDate=${START_ENC}&lastModEndDate=${END_ENC}"
# Fetch with retry logic
for i in 1 2 3; do
HTTP_CODE=$(curl -s -w "%{http_code}" -o "$TEMP_DIR/nvd_${KEYWORD}.json" "$URL")
if [ "$HTTP_CODE" = "200" ]; then
COUNT=$(jq '.vulnerabilities | length // 0' "$TEMP_DIR/nvd_${KEYWORD}.json" 2>/dev/null || echo 0)
echo " ✓ Found $COUNT CVEs"
break
elif [ "$HTTP_CODE" = "403" ] || [ "$HTTP_CODE" = "429" ]; then
echo " Rate limited, waiting 30s before retry $i..."
sleep 30
else
echo " HTTP $HTTP_CODE, retry $i..."
sleep 5
fi
done
# NVD recommends 6 second delay between requests
echo " Waiting 6s (NVD rate limit)..."
sleep 6
done
echo ""
echo "=== Processing CVEs ==="
# Combine all fetched CVEs
echo '{"vulnerabilities":[]}' > "$TEMP_DIR/combined.json"
for KEYWORD in $KEYWORDS; do
FILE="$TEMP_DIR/nvd_${KEYWORD}.json"
if [ -f "$FILE" ] && [ -s "$FILE" ]; then
if jq -e '.vulnerabilities' "$FILE" > /dev/null 2>&1; then
jq -s '.[0].vulnerabilities += .[1].vulnerabilities | .[0]' \
"$TEMP_DIR/combined.json" "$FILE" > "$TEMP_DIR/combined_new.json"
mv "$TEMP_DIR/combined_new.json" "$TEMP_DIR/combined.json"
fi
fi
done
# Deduplicate by CVE ID
jq '.vulnerabilities | unique_by(.cve.id)' "$TEMP_DIR/combined.json" > "$TEMP_DIR/unique_cves.json"
TOTAL=$(jq 'length' "$TEMP_DIR/unique_cves.json")
echo "Total unique CVEs from NVD: $TOTAL"
# Post-filter: keep only CVEs matching our criteria
KEYWORDS_PATTERN="OpenClaw|clawdbot|Moltbot|openclaw|NanoClaw|nanoclaw|WhatsApp-bot|baileys"
jq --arg kw "$KEYWORDS_PATTERN" --arg gh "$GITHUB_REF_PATTERN" '
[.[] | select(
(.cve.descriptions[]? | select(.lang == "en") | .value | test($kw; "i"))
or
(.cve.references[]? | .url | test($gh; "i"))
)]
' "$TEMP_DIR/unique_cves.json" > "$TEMP_DIR/filtered_cves.json"
FILTERED=$(jq 'length' "$TEMP_DIR/filtered_cves.json")
echo "Filtered CVEs (matching criteria): $FILTERED"
# Get existing advisory IDs (unless force mode)
if [ "$FORCE" = "true" ]; then
echo "Force mode: ignoring existing advisory IDs during transform"
EXISTING_IDS=""
elif [ -f "$FEED_PATH" ]; then
EXISTING_IDS=$(jq -r '.advisories[]?.id // empty' "$FEED_PATH" | sort -u)
else
EXISTING_IDS=""
fi
# Transform CVEs to our advisory format (same logic as pipeline)
EXISTING_JSON=$(echo "$EXISTING_IDS" | jq -R -s 'split("\n") | map(select(length > 0))')
jq --argjson existing "$EXISTING_JSON" '
def map_severity:
if . == null then "medium"
elif . >= 9.0 then "critical"
elif . >= 7.0 then "high"
elif . >= 4.0 then "medium"
else "low"
end;
def get_cvss_score:
.cve.metrics.cvssMetricV31[0]?.cvssData.baseScore //
.cve.metrics.cvssMetricV30[0]?.cvssData.baseScore //
.cve.metrics.cvssMetricV2[0]?.cvssData.baseScore //
null;
def nvd_category_raw:
(
[.cve.weaknesses[]?.description[]? | select(.lang == "en") | .value | strings | select(length > 0)]
| unique
| map(select(. != "NVD-CWE-noinfo" and . != "NVD-CWE-Other"))
| .[0]
);
def cwe_id:
(
nvd_category_raw
| if . == null then null
else (try (capture("^CWE-(?<id>[0-9]+)$").id) catch null)
end
);
def cwe_name_map($id):
({
"20": "improper_input_validation",
"22": "path_traversal",
"77": "command_injection",
"78": "os_command_injection",
"79": "cross_site_scripting",
"89": "sql_injection",
"94": "code_injection",
"119": "memory_buffer_bounds_violation",
"120": "classic_buffer_overflow",
"125": "out_of_bounds_read",
"134": "format_string_vulnerability",
"200": "exposure_of_sensitive_information",
"250": "execution_with_unnecessary_privileges",
"269": "improper_privilege_management",
"284": "improper_access_control",
"285": "improper_authorization",
"287": "improper_authentication",
"295": "improper_certificate_validation",
"306": "missing_authentication_for_critical_function",
"319": "cleartext_transmission_of_sensitive_information",
"326": "inadequate_encryption_strength",
"327": "risky_cryptographic_algorithm",
"352": "cross_site_request_forgery",
"362": "race_condition",
"400": "uncontrolled_resource_consumption",
"416": "use_after_free",
"434": "unrestricted_file_upload",
"502": "deserialization_of_untrusted_data",
"601": "open_redirect",
"611": "xml_external_entity_injection",
"639": "insecure_direct_object_reference",
"668": "exposure_of_resource_to_wrong_sphere",
"669": "incorrect_resource_transfer_between_spheres",
"732": "incorrect_permission_assignment",
"787": "out_of_bounds_write",
"798": "hard_coded_credentials",
"862": "missing_authorization",
"863": "incorrect_authorization",
"918": "server_side_request_forgery",
"922": "insecure_storage_of_sensitive_information"
}[$id]);
def nvd_category_name:
(
cwe_id as $id
| if $id == null then "unspecified_weakness"
else (cwe_name_map($id) // ("unknown_cwe_" + $id))
end
);
def cpe_criteria:
(
[.cve.configurations[]? | .. | objects | .criteria? | strings | select(startswith("cpe:2.3:"))]
| unique
);
def inferred_targets:
(
(
[
(.cve.descriptions[]? | select(.lang == "en") | .value),
(.cve.references[]?.url // empty)
]
| map(strings | ascii_downcase)
| join(" ")
) as $blob
| (
(if ($blob | test("github\\.com/openclaw/openclaw|\\bopenclaw\\b|\\bclawdbot\\b|\\bmoltbot\\b")) then ["openclaw@*"] else [] end)
+ (if ($blob | test("github\\.com/qwibitai/nanoclaw|\\bnanoclaw\\b|whatsapp-bot|\\bbaileys\\b")) then ["nanoclaw@*"] else [] end)
)
);
def normalized_affected:
(
(cpe_criteria + inferred_targets)
| unique
| .[0:5]
| if length == 0 then ["openclaw@*", "nanoclaw@*"] else . end
);
[.[] |
select(.cve.id as $id | $existing | index($id) | not) |
{
id: .cve.id,
severity: (get_cvss_score | map_severity),
type: nvd_category_name,
nvd_category_id: nvd_category_raw,
title: (.cve.descriptions[] | select(.lang == "en") | .value | .[0:100] + (if length > 100 then "..." else "" end)),
description: (.cve.descriptions[] | select(.lang == "en") | .value),
affected: normalized_affected,
action: "Review and update affected components. See NVD for remediation details.",
published: .cve.published,
references: [.cve.references[]?.url // empty] | unique | .[0:3],
cvss_score: get_cvss_score,
nvd_url: ("https://nvd.nist.gov/vuln/detail/" + .cve.id),
exploitability_score: null,
exploitability_rationale: null
}
]
' "$TEMP_DIR/filtered_cves.json" > "$TEMP_DIR/new_advisories.json"
NEW_COUNT=$(jq 'length' "$TEMP_DIR/new_advisories.json")
echo "New advisories to add: $NEW_COUNT"
if [ "$NEW_COUNT" -eq 0 ]; then
echo ""
echo "No new CVEs found. Feed is up to date."
echo "Use --force to re-fetch all CVEs regardless of existing entries."
exit 0
fi
echo ""
echo "=== Analyzing Exploitability ==="
# Build CVSS vector lookup for enriched analysis inputs.
jq '
[.[] | {
id: .cve.id,
cvss_vector: (
.cve.metrics.cvssMetricV31[0]?.cvssData.vectorString //
.cve.metrics.cvssMetricV30[0]?.cvssData.vectorString //
.cve.metrics.cvssMetricV2[0]?.vectorString //
""
)
}] | map({(.id): .cvss_vector}) | add
' "$TEMP_DIR/filtered_cves.json" > "$TEMP_DIR/cvss_vectors.json"
"$ENRICH_SCRIPT" \
--mode batch \
--input "$TEMP_DIR/new_advisories.json" \
--output "$TEMP_DIR/new_advisories.json" \
--cvss-vectors "$TEMP_DIR/cvss_vectors.json"
echo ""
echo "=== New Advisories ==="
jq -r '.[] | " \(.id) [\(.severity)] - \(.title)"' "$TEMP_DIR/new_advisories.json"
echo ""
echo "=== Updating Feeds ==="
NOW=$(date -u +%Y-%m-%dT%H:%M:%SZ)
# Merge new advisories into existing feed
if [ -f "$FEED_PATH" ]; then
jq --argjson new "$(cat "$TEMP_DIR/new_advisories.json")" --arg now "$NOW" '
.updated = $now |
# Merge by advisory ID so force mode can refresh existing CVEs without duplicates
.advisories = (
reduce (.advisories + $new)[] as $adv
({};
if ($adv.id // "") == "" then
.
else
.[$adv.id] = $adv
end
)
| [.[]]
| sort_by(.published)
| reverse
)
' "$FEED_PATH" > "$TEMP_DIR/updated_feed.json"
else
jq -n --argjson advisories "$(cat "$TEMP_DIR/new_advisories.json")" --arg now "$NOW" '{
version: "1.0.0",
updated: $now,
description: "Community-driven security advisory feed for ClawSec. Automatically updated with OpenClaw and NanoClaw-related CVEs from NVD.",
advisories: ($advisories | sort_by(.published) | reverse)
}' > "$TEMP_DIR/updated_feed.json"
fi
# Validate and save
if jq empty "$TEMP_DIR/updated_feed.json" 2>/dev/null; then
# Update main feed
cp "$TEMP_DIR/updated_feed.json" "$FEED_PATH"
echo "✓ Updated: $FEED_PATH"
# Sync feed mirrors for local skill/public consumers.
sync_feed_to_mirrors "$FEED_PATH" "create"
echo ""
TOTAL_ADVISORIES=$(jq '.advisories | length' "$FEED_PATH")
echo "=== Summary ==="
echo "Total advisories in feed: $TOTAL_ADVISORIES"
echo "New advisories added: $NEW_COUNT"
echo ""
echo "Run 'npm run dev' to preview the feed in the local site."
else
echo "Error: Generated invalid JSON"
exit 1
fi