refactor(ci): extract shared exploitability enrichment helper (#95)

* refactor(ci): share exploitability enrichment script * refactor(scripts): reuse shared exploitability enricher in local feed
2026-06-13 05:28:02 +03:00 · 2026-03-01 21:50:10 +02:00
parent 56a36b7e52
commit e0eae65586
4 changed files with 283 additions and 193 deletions
@@ -206,60 +206,13 @@ jobs:
          echo "=== Analyzing exploitability for community advisory ==="
-          # Extract fields from advisory for analysis
+          scripts/ci/enrich_exploitability.sh \
-          CVE_ID=$(jq -r '.id' tmp_advisory.json)
+            --mode single \
-          SEVERITY=$(jq -r '.severity // "medium"' tmp_advisory.json)
+            --input tmp_advisory.json \
-          VULN_TYPE=$(jq -r '.type // "unknown"' tmp_advisory.json)
+            --output tmp_advisory.json
          DESCRIPTION=$(jq -r '.description // ""' tmp_advisory.json)
          REFERENCES=$(jq -c '.references // []' tmp_advisory.json)
-          # Map severity to approximate CVSS score for analysis
+          echo "=== Exploitability analysis complete ==="
-          case "$SEVERITY" in
+          echo "Exploitability score: $(jq -r '.exploitability_score // "unknown"' tmp_advisory.json)"
            critical) CVSS_SCORE=9.5 ;;
            high) CVSS_SCORE=7.5 ;;
            medium) CVSS_SCORE=5.5 ;;
            low) CVSS_SCORE=3.0 ;;
            *) CVSS_SCORE=5.0 ;;
          esac
          # Build input JSON for analyzer
          INPUT_JSON=$(jq -n \
            --arg cve_id "$CVE_ID" \
            --argjson cvss_score "$CVSS_SCORE" \
            --arg type "$VULN_TYPE" \
            --arg description "$DESCRIPTION" \
            --argjson references "$REFERENCES" \
            '{
              cve_id: $cve_id,
              cvss_score: $cvss_score,
              type: $type,
              description: $description,
              references: $references
            }')
          # Run exploitability analysis with exploit detection.
          # Continue without enrichment if analysis fails.
          if ANALYSIS=$(echo "$INPUT_JSON" | python utils/analyze_exploitability.py --json --check-exploits 2>/dev/null); then
            echo "$ANALYSIS" > tmp_exploitability.json
            echo "✓ Analyzed $CVE_ID"
            # Merge exploitability analysis into advisory
            jq --slurpfile analysis tmp_exploitability.json '
              . + {
                exploitability_score: $analysis[0].exploitability_score,
                exploitability_rationale: $analysis[0].exploitability_rationale,
                attack_vector_analysis: $analysis[0].attack_vector_analysis,
                exploit_detection: $analysis[0].exploit_detection
              }
            ' tmp_advisory.json > tmp_advisory_enriched.json
            mv tmp_advisory_enriched.json tmp_advisory.json
            echo "=== Exploitability analysis complete ==="
            echo "Exploitability score: $(jq -r '.exploitability_score // "unknown"' tmp_advisory.json)"
          else
            echo "::warning::Failed to analyze exploitability for $CVE_ID, continuing without enrichment"
          fi
      - name: Update feed
        if: steps.parse.outputs.already_exists != 'true'
@@ -684,68 +684,11 @@ jobs:
            }] | map({(.id): .cvss_vector}) | add
          ' tmp/filtered_cves.json > tmp/cvss_vectors.json
-          # Process each advisory through exploitability analyzer
+          scripts/ci/enrich_exploitability.sh \
-          jq -c '.[]' tmp/new_advisories.json | while IFS= read -r advisory; do
+            --mode batch \
-            CVE_ID=$(echo "$advisory" | jq -r '.id')
+            --input tmp/new_advisories.json \
-            CVSS_SCORE=$(echo "$advisory" | jq -r '.cvss_score // 0')
+            --output tmp/new_advisories.json \
-            CVSS_VECTOR=$(jq -r --arg id "$CVE_ID" '.[$id] // ""' tmp/cvss_vectors.json)
+            --cvss-vectors tmp/cvss_vectors.json
            VULN_TYPE=$(echo "$advisory" | jq -r '.type // ""')
            DESCRIPTION=$(echo "$advisory" | jq -r '.description // ""')
            REFERENCES=$(echo "$advisory" | jq -c '.references // []')
            # Build input JSON for analyzer
            INPUT_JSON=$(jq -n \
              --arg cve_id "$CVE_ID" \
              --argjson cvss_score "$CVSS_SCORE" \
              --arg cvss_vector "$CVSS_VECTOR" \
              --arg type "$VULN_TYPE" \
              --arg description "$DESCRIPTION" \
              --argjson references "$REFERENCES" \
              '{
                cve_id: $cve_id,
                cvss_score: $cvss_score,
                cvss_vector: $cvss_vector,
                type: $type,
                description: $description,
                references: $references
              }')
            # Run exploitability analysis with exploit detection.
            # Keep processing if any single advisory analysis fails.
            if ANALYSIS=$(echo "$INPUT_JSON" | python utils/analyze_exploitability.py --json --check-exploits 2>/dev/null); then
              echo "$ANALYSIS" > "tmp/exploitability_${CVE_ID}.json"
              echo "✓ Analyzed $CVE_ID"
            else
              echo "::warning::Failed to analyze exploitability for $CVE_ID, skipping enrichment"
            fi
          done
          # Merge exploitability analysis back into advisories.
          if ls tmp/exploitability_*.json >/dev/null 2>&1; then
            jq -s '.' tmp/exploitability_*.json > tmp/exploitability_analyses.json
          else
            echo '[]' > tmp/exploitability_analyses.json
          fi
          jq --slurpfile analyses tmp/exploitability_analyses.json '
            map(
              . as $advisory |
              ($analyses[0] | map(select(.cve_id == $advisory.id)) | first) as $analysis |
              if $analysis then
                $advisory + {
                  exploitability_score: $analysis.exploitability_score,
                  exploitability_rationale: $analysis.exploitability_rationale,
                  attack_vector_analysis: $analysis.attack_vector_analysis,
                  exploit_detection: $analysis.exploit_detection
                }
              else
                $advisory
              end
            )
          ' tmp/new_advisories.json > tmp/new_advisories_enriched.json
          # Replace original with enriched version
          mv tmp/new_advisories_enriched.json tmp/new_advisories.json
          echo "=== Exploitability analysis complete ==="
@@ -0,0 +1,263 @@
 #!/usr/bin/env bash
 set -euo pipefail
 usage() {
  cat <<'EOF'
 Usage:
  scripts/ci/enrich_exploitability.sh --mode single|batch --input <path> --output <path> [--cvss-vectors <path>] [--analyzer <path>]
 Options:
  --mode           Processing mode: single advisory object or batch advisory array
  --input          Input JSON path
  --output         Output JSON path
  --cvss-vectors   Optional JSON object mapping advisory id -> CVSS vector
  --analyzer       Optional analyzer path (default: utils/analyze_exploitability.py)
  --help           Show this help
 EOF
 }
 REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
 cd "$REPO_ROOT"
 MODE=""
 INPUT_PATH=""
 OUTPUT_PATH=""
 CVSS_VECTORS_PATH=""
 ANALYZER_PATH="utils/analyze_exploitability.py"
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --mode)
      MODE="${2:-}"
      shift 2
      ;;
    --input)
      INPUT_PATH="${2:-}"
      shift 2
      ;;
    --output)
      OUTPUT_PATH="${2:-}"
      shift 2
      ;;
    --cvss-vectors)
      CVSS_VECTORS_PATH="${2:-}"
      shift 2
      ;;
    --analyzer)
      ANALYZER_PATH="${2:-}"
      shift 2
      ;;
    --help|-h)
      usage
      exit 0
      ;;
    *)
      echo "ERROR: Unknown argument: $1" >&2
      usage >&2
      exit 1
      ;;
  esac
 done
 if [[ "$MODE" != "single" && "$MODE" != "batch" ]]; then
  echo "ERROR: --mode must be one of: single, batch" >&2
  exit 1
 fi
 if [[ -z "$INPUT_PATH" || -z "$OUTPUT_PATH" ]]; then
  echo "ERROR: --input and --output are required" >&2
  exit 1
 fi
 if [[ ! -f "$INPUT_PATH" ]]; then
  echo "ERROR: input file not found: $INPUT_PATH" >&2
  exit 1
 fi
 if [[ ! -f "$ANALYZER_PATH" ]]; then
  echo "ERROR: analyzer file not found: $ANALYZER_PATH" >&2
  exit 1
 fi
 if [[ -n "$CVSS_VECTORS_PATH" && ! -f "$CVSS_VECTORS_PATH" ]]; then
  echo "ERROR: --cvss-vectors file not found: $CVSS_VECTORS_PATH" >&2
  exit 1
 fi
 if ! command -v jq >/dev/null 2>&1; then
  echo "ERROR: jq is required" >&2
  exit 1
 fi
 if command -v python >/dev/null 2>&1; then
  PYTHON_BIN="python"
 elif command -v python3 >/dev/null 2>&1; then
  PYTHON_BIN="python3"
 else
  echo "ERROR: python or python3 is required" >&2
  exit 1
 fi
 tmpdir="$(mktemp -d)"
 trap 'rm -rf "$tmpdir"' EXIT
 resolve_cvss_vector() {
  local advisory_json="$1"
  local advisory_id
  advisory_id="$(echo "$advisory_json" | jq -r '.id // ""')"
  if [[ -n "$CVSS_VECTORS_PATH" ]]; then
    jq -r --arg id "$advisory_id" '.[$id] // ""' "$CVSS_VECTORS_PATH"
  else
    echo "$advisory_json" | jq -r '.cvss_vector // ""'
  fi
 }
 severity_to_cvss() {
  case "$1" in
    critical) echo "9.5" ;;
    high) echo "7.5" ;;
    medium) echo "5.5" ;;
    low) echo "3.0" ;;
    *) echo "5.0" ;;
  esac
 }
 build_analysis_input() {
  local advisory_json="$1"
  local mode="$2"
  local cve_id cvss_score cvss_vector vuln_type description references severity
  cve_id="$(echo "$advisory_json" | jq -r '.id // ""')"
  vuln_type="$(echo "$advisory_json" | jq -r '.type // ""')"
  description="$(echo "$advisory_json" | jq -r '.description // ""')"
  references="$(echo "$advisory_json" | jq -c '.references // []')"
  cvss_vector="$(resolve_cvss_vector "$advisory_json")"
  if [[ "$mode" == "single" ]]; then
    severity="$(echo "$advisory_json" | jq -r '.severity // "medium"')"
    cvss_score="$(severity_to_cvss "$severity")"
  else
    cvss_score="$(echo "$advisory_json" | jq -r '.cvss_score // 0')"
  fi
  jq -n \
    --arg cve_id "$cve_id" \
    --argjson cvss_score "$cvss_score" \
    --arg cvss_vector "$cvss_vector" \
    --arg type "$vuln_type" \
    --arg description "$description" \
    --argjson references "$references" \
    '{
      cve_id: $cve_id,
      cvss_score: $cvss_score,
      cvss_vector: $cvss_vector,
      type: $type,
      description: $description,
      references: $references
    }'
 }
 run_analysis() {
  local advisory_json="$1"
  local mode="$2"
  local output_file="$3"
  local advisory_id analysis_input analysis
  advisory_id="$(echo "$advisory_json" | jq -r '.id // "unknown"')"
  analysis_input="$(build_analysis_input "$advisory_json" "$mode")"
  if analysis="$(echo "$analysis_input" | "$PYTHON_BIN" "$ANALYZER_PATH" --json --check-exploits 2>/dev/null)"; then
    echo "$analysis" > "$output_file"
    return 0
  fi
  echo "::warning::Failed to analyze exploitability for $advisory_id, continuing without enrichment"
  return 1
 }
 enrich_single() {
  if ! jq -e 'type == "object"' "$INPUT_PATH" >/dev/null; then
    echo "ERROR: single mode expects JSON object at $INPUT_PATH" >&2
    exit 1
  fi
  local advisory analysis_file output_tmp
  advisory="$(cat "$INPUT_PATH")"
  analysis_file="$tmpdir/analysis_single.json"
  output_tmp="$tmpdir/output_single.json"
  if run_analysis "$advisory" "single" "$analysis_file"; then
    jq --slurpfile analysis "$analysis_file" '
      . + {
        exploitability_score: $analysis[0].exploitability_score,
        exploitability_rationale: $analysis[0].exploitability_rationale,
        attack_vector_analysis: $analysis[0].attack_vector_analysis,
        exploit_detection: $analysis[0].exploit_detection
      }
    ' "$INPUT_PATH" > "$output_tmp"
  else
    cp "$INPUT_PATH" "$output_tmp"
  fi
  mv "$output_tmp" "$OUTPUT_PATH"
  echo "Exploitability enrichment complete (single): $OUTPUT_PATH"
 }
 enrich_batch() {
  if ! jq -e 'type == "array"' "$INPUT_PATH" >/dev/null; then
    echo "ERROR: batch mode expects JSON array at $INPUT_PATH" >&2
    exit 1
  fi
  local analyzed_count failed_count index advisory analysis_file output_tmp analyses_json
  analyzed_count=0
  failed_count=0
  index=0
  analyses_json="$tmpdir/analyses.json"
  output_tmp="$tmpdir/output_batch.json"
  while IFS= read -r advisory; do
    analysis_file="$tmpdir/analysis_${index}.json"
    if run_analysis "$advisory" "batch" "$analysis_file"; then
      analyzed_count=$((analyzed_count + 1))
    else
      failed_count=$((failed_count + 1))
      rm -f "$analysis_file"
    fi
    index=$((index + 1))
  done < <(jq -c '.[]' "$INPUT_PATH")
  if ls "$tmpdir"/analysis_*.json >/dev/null 2>&1; then
    jq -s '.' "$tmpdir"/analysis_*.json > "$analyses_json"
  else
    echo '[]' > "$analyses_json"
  fi
  jq --slurpfile analyses "$analyses_json" '
    map(
      . as $advisory |
      ($analyses[0] | map(select(.cve_id == $advisory.id)) | first) as $analysis |
      if $analysis then
        $advisory + {
          exploitability_score: $analysis.exploitability_score,
          exploitability_rationale: $analysis.exploitability_rationale,
          attack_vector_analysis: $analysis.attack_vector_analysis,
          exploit_detection: $analysis.exploit_detection
        }
      else
        $advisory
      end
    )
  ' "$INPUT_PATH" > "$output_tmp"
  mv "$output_tmp" "$OUTPUT_PATH"
  echo "Exploitability enrichment complete (batch): $OUTPUT_PATH"
  echo "Analyzed: $analyzed_count, failed: $failed_count"
 }
 if [[ "$MODE" == "single" ]]; then
  enrich_single
 else
  enrich_batch
 fi
@@ -18,7 +18,7 @@ source "$SCRIPT_DIR/feed-utils.sh"
 init_feed_paths "$PROJECT_ROOT"
 KEYWORDS="OpenClaw clawdbot Moltbot NanoClaw WhatsApp-bot baileys"
 GITHUB_REF_PATTERN="github.com/openclaw/openclaw github.com/qwibitai/NanoClaw"
-ANALYZER="$PROJECT_ROOT/utils/analyze_exploitability.py"
+ENRICH_SCRIPT="$PROJECT_ROOT/scripts/ci/enrich_exploitability.sh"
 # Parse args
 DAYS_BACK=120
@@ -47,19 +47,9 @@ echo "Days back: $DAYS_BACK"
 echo "Force mode: $FORCE"
 echo ""
-# Verify exploitability analyzer prerequisites
+# Verify enrichment helper exists (it validates Python/analyzer prerequisites internally).
-if ! command -v python3 &> /dev/null; then
+if [ ! -x "$ENRICH_SCRIPT" ]; then
-  echo "Error: python3 is required but not found in PATH"
+  echo "Error: Exploitability enrichment helper not found or not executable: $ENRICH_SCRIPT"
  exit 1
 fi
 if [ ! -f "$ANALYZER" ]; then
  echo "Error: Exploitability analyzer not found: $ANALYZER"
  exit 1
 fi
 if ! python3 "$ANALYZER" --help &> /dev/null; then
  echo "Error: Exploitability analyzer failed to run. Check Python environment."
  exit 1
 fi
@@ -331,70 +321,11 @@ jq '
  }] | map({(.id): .cvss_vector}) | add
 ' "$TEMP_DIR/filtered_cves.json" > "$TEMP_DIR/cvss_vectors.json"
-ANALYZED_COUNT=0
+"$ENRICH_SCRIPT" \
-FAILED_ANALYSIS=0
+  --mode batch \
-
+  --input "$TEMP_DIR/new_advisories.json" \
-while IFS= read -r advisory; do
+  --output "$TEMP_DIR/new_advisories.json" \
-  CVE_ID=$(echo "$advisory" | jq -r '.id')
+  --cvss-vectors "$TEMP_DIR/cvss_vectors.json"
  CVSS_SCORE=$(echo "$advisory" | jq -r '.cvss_score // 0')
  CVSS_VECTOR=$(jq -r --arg id "$CVE_ID" '.[$id] // ""' "$TEMP_DIR/cvss_vectors.json")
  VULN_TYPE=$(echo "$advisory" | jq -r '.type // ""')
  DESCRIPTION=$(echo "$advisory" | jq -r '.description // ""')
  REFERENCES=$(echo "$advisory" | jq -c '.references // []')
  INPUT_JSON=$(jq -n \
    --arg cve_id "$CVE_ID" \
    --argjson cvss_score "$CVSS_SCORE" \
    --arg cvss_vector "$CVSS_VECTOR" \
    --arg type "$VULN_TYPE" \
    --arg description "$DESCRIPTION" \
    --argjson references "$REFERENCES" \
    '{
      cve_id: $cve_id,
      cvss_score: $cvss_score,
      cvss_vector: $cvss_vector,
      type: $type,
      description: $description,
      references: $references
    }')
  if ANALYSIS=$(echo "$INPUT_JSON" | python3 "$ANALYZER" --json --check-exploits 2>/dev/null); then
    echo "$ANALYSIS" > "$TEMP_DIR/exploitability_${CVE_ID}.json"
    SCORE=$(echo "$ANALYSIS" | jq -r '.exploitability_score // "unknown"')
    echo "  ✓ $CVE_ID -> $SCORE"
    ANALYZED_COUNT=$((ANALYZED_COUNT + 1))
  else
    echo "  ⚠ $CVE_ID analysis failed; keeping null exploitability fields"
    FAILED_ANALYSIS=$((FAILED_ANALYSIS + 1))
  fi
 done < <(jq -c '.[]' "$TEMP_DIR/new_advisories.json")
 if ls "$TEMP_DIR"/exploitability_*.json >/dev/null 2>&1; then
  jq -s '.' "$TEMP_DIR"/exploitability_*.json > "$TEMP_DIR/exploitability_analyses.json"
 else
  echo '[]' > "$TEMP_DIR/exploitability_analyses.json"
 fi
 jq --slurpfile analyses "$TEMP_DIR/exploitability_analyses.json" '
  map(
    . as $advisory |
    ($analyses[0] | map(select(.cve_id == $advisory.id)) | first) as $analysis |
    if $analysis then
      $advisory + {
        exploitability_score: $analysis.exploitability_score,
        exploitability_rationale: $analysis.exploitability_rationale,
        attack_vector_analysis: $analysis.attack_vector_analysis,
        exploit_detection: $analysis.exploit_detection
      }
    else
      $advisory
    end
  )
 ' "$TEMP_DIR/new_advisories.json" > "$TEMP_DIR/new_advisories_enriched.json"
 mv "$TEMP_DIR/new_advisories_enriched.json" "$TEMP_DIR/new_advisories.json"
 echo "Exploitability analysis complete: $ANALYZED_COUNT analyzed, $FAILED_ANALYSIS failed"
 echo ""
 echo "=== New Advisories ==="