From a50966601db9e632f88cbcd9c8f6fdebb10bdf5c Mon Sep 17 00:00:00 2001 From: David Abutbul Date: Fri, 27 Feb 2026 21:49:08 +0200 Subject: [PATCH] auto-claude: subtask-6-3 - Write integration test for policy parsing workflow --- .../test/integration-policy.test.mjs | 600 ++++++++++++++++++ 1 file changed, 600 insertions(+) create mode 100755 skills/clawsec-analyst/test/integration-policy.test.mjs diff --git a/skills/clawsec-analyst/test/integration-policy.test.mjs b/skills/clawsec-analyst/test/integration-policy.test.mjs new file mode 100755 index 0000000..4f0059d --- /dev/null +++ b/skills/clawsec-analyst/test/integration-policy.test.mjs @@ -0,0 +1,600 @@ +#!/usr/bin/env node + +/** + * Integration test for policy parsing workflow in clawsec-analyst. + * + * Tests cover: + * - End-to-end policy parsing workflow (NL input -> Claude API -> structured policy) + * - Multiple policies batch processing with different confidence levels + * - Policy validation workflow with suggestions + * - Low confidence handling and rejection + * - Error resilience with fallback + * - Policy formatting and display output + * - Complete integration of policy-engine with Claude API client + * + * Run: ANTHROPIC_API_KEY=test node skills/clawsec-analyst/test/integration-policy.test.mjs + */ + +import { fileURLToPath } from "node:url"; +import path from "node:path"; +import { + pass, + fail, + report, + exitWithResults, +} from "./lib/test_harness.mjs"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const LIB_PATH = path.resolve(__dirname, "..", "lib"); + +// Set NODE_ENV to test to suppress console warnings during tests +process.env.NODE_ENV = "test"; + +// Dynamic import to ensure we test the actual compiled modules +const { + parsePolicy, + parsePolicies, + validatePolicyStatement, + formatPolicyResult, + getConfidenceThreshold, +} = await import(`${LIB_PATH}/policy-engine.js`); + +// ----------------------------------------------------------------------------- +// Mock Claude Client +// ----------------------------------------------------------------------------- + +class MockClaudeClient { + constructor() { + this._responseFn = null; + this._callCount = 0; + this._shouldFail = false; + } + + /** + * Set response function for controlled responses + */ + setResponseFn(fn) { + this._responseFn = fn; + return this; + } + + /** + * Configure client to fail all requests + */ + setShouldFail(shouldFail) { + this._shouldFail = shouldFail; + return this; + } + + /** + * Mock parsePolicy implementation + */ + async parsePolicy(nlPolicy) { + this._callCount++; + + if (this._shouldFail) { + throw new Error("Mock Claude API unavailable"); + } + + if (!this._responseFn) { + throw new Error("No response function configured"); + } + + return this._responseFn(nlPolicy, this._callCount); + } + + getCallCount() { + return this._callCount; + } +} + +// ----------------------------------------------------------------------------- +// Test Helpers +// ----------------------------------------------------------------------------- + +/** + * Create a valid policy response with high confidence + */ +function createValidPolicyResponse(overrides = {}) { + const defaults = { + policy: { + type: "advisory-severity", + condition: { + operator: "equals", + field: "severity", + value: "critical", + }, + action: "block", + description: "Block critical severity advisories", + }, + confidence: 0.95, + ambiguities: [], + }; + + return JSON.stringify({ + ...defaults, + ...overrides, + policy: { + ...defaults.policy, + ...(overrides.policy || {}), + condition: { + ...defaults.policy.condition, + ...(overrides.policy?.condition || {}), + }, + }, + }); +} + +/** + * Create a low-confidence response + */ +function createLowConfidenceResponse(ambiguities = []) { + return JSON.stringify({ + policy: { + type: "custom", + condition: { + operator: "equals", + field: "unknown", + value: "something", + }, + action: "log", + description: "Ambiguous policy", + }, + confidence: 0.3, + ambiguities: ambiguities.length > 0 + ? ambiguities + : ["Policy statement is too vague", "Unable to determine specific action"], + }); +} + +/** + * Create a response based on the NL input + */ +function createContextualResponse(nlPolicy) { + // Simulate contextual responses based on input + if (nlPolicy.toLowerCase().includes("critical") && nlPolicy.toLowerCase().includes("block")) { + return createValidPolicyResponse({ + policy: { + type: "advisory-severity", + condition: { + operator: "equals", + field: "severity", + value: "critical", + }, + action: "block", + description: "Block critical severity advisories", + }, + confidence: 0.95, + }); + } + + if (nlPolicy.toLowerCase().includes("high") && nlPolicy.toLowerCase().includes("warn")) { + return createValidPolicyResponse({ + policy: { + type: "advisory-severity", + condition: { + operator: "equals", + field: "severity", + value: "high", + }, + action: "warn", + description: "Warn on high severity advisories", + }, + confidence: 0.88, + }); + } + + if (nlPolicy.toLowerCase().includes("risk") && nlPolicy.toLowerCase().includes("score")) { + return createValidPolicyResponse({ + policy: { + type: "risk-score", + condition: { + operator: "greater_than", + field: "riskScore", + value: 75, + }, + action: "require_approval", + description: "Require approval for risk scores above 75", + }, + confidence: 0.92, + }); + } + + // Default to low confidence + return createLowConfidenceResponse(); +} + +// ----------------------------------------------------------------------------- +// Test: Complete policy parsing workflow - NL to structured policy +// ----------------------------------------------------------------------------- +async function testCompletePolicyParsingWorkflow() { + const testName = "Complete policy parsing workflow: NL input -> Claude -> structured policy"; + + try { + const nlPolicy = "Block all critical severity advisories"; + + const client = new MockClaudeClient(); + client.setResponseFn((input) => { + if (input === nlPolicy) { + return createValidPolicyResponse({ + confidence: 0.95, + }); + } + return createLowConfidenceResponse(); + }); + + // Parse the policy + const result = await parsePolicy(nlPolicy, client); + + // Verify the complete workflow + if (!result.policy) { + fail(testName, "Expected policy to be defined"); + return; + } + + if ( + result.policy.type === "advisory-severity" && + result.policy.condition.operator === "equals" && + result.policy.condition.field === "severity" && + result.policy.condition.value === "critical" && + result.policy.action === "block" && + result.policy.id && + result.policy.id.startsWith("policy-") && + result.policy.createdAt && + result.confidence === 0.95 && + client.getCallCount() === 1 + ) { + pass(testName); + } else { + fail(testName, `Unexpected result: ${JSON.stringify(result)}`); + } + } catch (error) { + fail(testName, error); + } +} + +// ----------------------------------------------------------------------------- +// Test: Batch processing multiple policies with different confidence levels +// ----------------------------------------------------------------------------- +async function testBatchPolicyProcessing() { + const testName = "Batch processing: multiple policies with different confidence levels"; + + try { + const nlPolicies = [ + "Block all critical severity advisories", + "Warn on high severity advisories", + "Require approval for risk scores above 75", + "Do something vague", // This should have low confidence + ]; + + const client = new MockClaudeClient(); + client.setResponseFn(createContextualResponse); + + // Parse all policies + const results = await parsePolicies(nlPolicies, client); + + // Verify batch results + if (results.length !== 4) { + fail(testName, `Expected 4 results, got ${results.length}`); + return; + } + + // First three should succeed with high confidence + const successCount = results.filter((r) => r.policy !== null).length; + const lowConfidenceCount = results.filter((r) => r.confidence < getConfidenceThreshold()).length; + + if ( + successCount === 3 && + lowConfidenceCount === 1 && + results[0].policy?.type === "advisory-severity" && + results[1].policy?.type === "advisory-severity" && + results[2].policy?.type === "risk-score" && + results[3].policy === null && + client.getCallCount() === 4 + ) { + pass(testName); + } else { + fail( + testName, + `Expected 3 successes and 1 low confidence, got ${successCount} successes, ${lowConfidenceCount} low confidence. Results: ${JSON.stringify(results.map(r => ({ type: r.policy?.type, conf: r.confidence })))}` + ); + } + } catch (error) { + fail(testName, error); + } +} + +// ----------------------------------------------------------------------------- +// Test: Policy validation workflow with suggestions +// ----------------------------------------------------------------------------- +async function testPolicyValidationWorkflow() { + const testName = "Policy validation workflow: provides suggestions for improvement"; + + try { + const validPolicy = "Block all critical severity advisories"; + const ambiguousPolicy = "Do something risky"; + + const client = new MockClaudeClient(); + client.setResponseFn((input) => { + if (input === validPolicy) { + return createValidPolicyResponse({ confidence: 0.95 }); + } + return createLowConfidenceResponse([ + "The term 'risky' is not specific enough", + "No clear action specified", + ]); + }); + + // Validate valid policy + const validResult = await validatePolicyStatement(validPolicy, client); + + if (!validResult.valid) { + fail(testName, "Expected valid policy to be marked as valid"); + return; + } + + // Validate ambiguous policy + const ambiguousResult = await validatePolicyStatement(ambiguousPolicy, client); + + if ( + validResult.valid === true && + validResult.suggestions.length === 0 && + ambiguousResult.valid === false && + ambiguousResult.suggestions.length > 0 && + ambiguousResult.suggestions.some(s => s.includes("specific")) + ) { + pass(testName); + } else { + fail( + testName, + `Expected validation workflow to work correctly. Valid: ${JSON.stringify(validResult)}, Ambiguous: ${JSON.stringify(ambiguousResult)}` + ); + } + } catch (error) { + fail(testName, error); + } +} + +// ----------------------------------------------------------------------------- +// Test: Low confidence handling and rejection +// ----------------------------------------------------------------------------- +async function testLowConfidenceHandling() { + const testName = "Low confidence handling: rejects ambiguous policies"; + + try { + const ambiguousPolicy = "Maybe block some stuff"; + + const client = new MockClaudeClient(); + client.setResponseFn(() => createLowConfidenceResponse([ + "Policy is too ambiguous", + "No clear condition or action", + ])); + + const result = await parsePolicy(ambiguousPolicy, client); + + // Result should have null policy and low confidence + if ( + result.policy === null && + result.confidence < getConfidenceThreshold() && + result.ambiguities.length > 0 && + result.ambiguities[0].includes("ambiguous") + ) { + pass(testName); + } else { + fail(testName, `Expected null policy with low confidence, got: ${JSON.stringify(result)}`); + } + } catch (error) { + fail(testName, error); + } +} + +// ----------------------------------------------------------------------------- +// Test: Error resilience with Claude API failure +// ----------------------------------------------------------------------------- +async function testErrorResilience() { + const testName = "Error resilience: handles Claude API failures gracefully"; + + try { + const nlPolicy = "Block critical advisories"; + + const client = new MockClaudeClient(); + client.setShouldFail(true); + + // Attempt to parse - should throw + try { + await parsePolicy(nlPolicy, client); + fail(testName, "Expected error when Claude API fails"); + } catch (error) { + if (error.code === "CLAUDE_API_ERROR" && error.message.includes("Failed to parse policy")) { + pass(testName); + } else { + fail(testName, `Expected CLAUDE_API_ERROR, got: ${error.message}`); + } + } + } catch (error) { + fail(testName, error); + } +} + +// ----------------------------------------------------------------------------- +// Test: Policy formatting and display output +// ----------------------------------------------------------------------------- +async function testPolicyFormatting() { + const testName = "Policy formatting: generates human-readable output"; + + try { + const nlPolicy = "Block all critical severity advisories"; + + const client = new MockClaudeClient(); + client.setResponseFn(() => createValidPolicyResponse({ + confidence: 0.92, + ambiguities: ["Minor: could specify time window"], + })); + + const result = await parsePolicy(nlPolicy, client); + + // Format the result + const formatted = formatPolicyResult(result); + + // Verify formatting includes key elements + if ( + formatted.includes("Policy Parse Result") && + formatted.includes("Confidence: 92.0%") && + formatted.includes("Structured Policy") && + formatted.includes("Type: advisory-severity") && + formatted.includes("Action: block") && + formatted.includes("Condition:") && + formatted.includes("Field: severity") && + formatted.includes("Ambiguities:") && + formatted.includes("Minor: could specify time window") + ) { + pass(testName); + } else { + fail(testName, `Unexpected formatting: ${formatted}`); + } + } catch (error) { + fail(testName, error); + } +} + +// ----------------------------------------------------------------------------- +// Test: Complete integration with all policy types +// ----------------------------------------------------------------------------- +async function testComprehensivePolicyTypes() { + const testName = "Comprehensive policy types: supports all policy type workflows"; + + try { + const policyTypes = [ + { + nl: "Block critical advisories", + type: "advisory-severity", + action: "block", + }, + { + nl: "Prevent access to /etc/passwd", + type: "filesystem-access", + action: "block", + }, + { + nl: "Warn about connections to untrusted domains", + type: "network-access", + action: "warn", + }, + { + nl: "Require approval for vulnerabilities with CVSS > 7", + type: "dependency-vulnerability", + action: "require_approval", + }, + { + nl: "Block installations with risk score above 80", + type: "risk-score", + action: "block", + }, + ]; + + const client = new MockClaudeClient(); + client.setResponseFn((input, callCount) => { + const policy = policyTypes[callCount - 1]; + return createValidPolicyResponse({ + policy: { + type: policy.type, + condition: { + operator: "equals", + field: "test", + value: "test", + }, + action: policy.action, + description: input, + }, + confidence: 0.90, + }); + }); + + const results = await parsePolicies( + policyTypes.map(p => p.nl), + client + ); + + // Verify all policies were parsed with correct types + const allValid = results.every((r, idx) => { + return ( + r.policy !== null && + r.policy.type === policyTypes[idx].type && + r.policy.action === policyTypes[idx].action && + r.confidence >= 0.90 + ); + }); + + if (allValid && results.length === 5) { + pass(testName); + } else { + fail( + testName, + `Expected all 5 policy types to parse successfully, got: ${JSON.stringify(results.map(r => ({ type: r.policy?.type, action: r.policy?.action })))}` + ); + } + } catch (error) { + fail(testName, error); + } +} + +// ----------------------------------------------------------------------------- +// Test: Input validation edge cases +// ----------------------------------------------------------------------------- +async function testInputValidation() { + const testName = "Input validation: handles edge cases (empty, too short)"; + + try { + const client = new MockClaudeClient(); + client.setResponseFn(() => createValidPolicyResponse()); + + // Test empty string + try { + await parsePolicy("", client); + fail(testName, "Expected error for empty policy"); + return; + } catch (error) { + if (!error.message.includes("cannot be empty")) { + fail(testName, `Expected 'cannot be empty' error, got: ${error.message}`); + return; + } + } + + // Test too short string + try { + await parsePolicy("block", client); + fail(testName, "Expected error for too short policy"); + return; + } catch (error) { + if (!error.message.includes("too short")) { + fail(testName, `Expected 'too short' error, got: ${error.message}`); + return; + } + } + + pass(testName); + } catch (error) { + fail(testName, error); + } +} + +// ----------------------------------------------------------------------------- +// Run all tests +// ----------------------------------------------------------------------------- +async function runAllTests() { + console.log("=== Integration Test: Policy Parsing Workflow ===\n"); + + await testCompletePolicyParsingWorkflow(); + await testBatchPolicyProcessing(); + await testPolicyValidationWorkflow(); + await testLowConfidenceHandling(); + await testErrorResilience(); + await testPolicyFormatting(); + await testComprehensivePolicyTypes(); + await testInputValidation(); + + report(); + exitWithResults(); +} + +runAllTests();