Error Handling

Error handling implements robust recovery patterns without introducing new primitives beyond Nodes and Events.

Core Principles

Happen's approach to error handling is built on these fundamental principles:

  1. Errors as Flow Branches: Errors are treated as natural branches in the event flow

  2. Functional Error Handling: Error handlers are just functions that can be returned

  3. Context for Error Information: Error details flow through context between functions

  4. Causal Error Tracking: Errors maintain causal relationships like normal events

  5. Decentralized Recovery: Nodes make local recovery decisions when possible

This approach achieves remarkable power with minimal complexity by leveraging existing primitives rather than introducing special error-handling constructs.

The Functional Error Model

In Happen, error handling is fully integrated into the Event Continuum through function returns:

// Register an event handler
orderNode.on("process-order", function validateOrder(event, context) {
  // Validate the order
  const validation = validateOrderData(event.payload);
  
  if (!validation.valid) {
    // Return error handler function on validation failure
    return handleInvalidOrder;
  }
  
  // Proceed with valid order
  context.validated = true;
  return processOrder;
});

// Error handler function
function handleInvalidOrder(event, context) {
  // Log the validation failure
  logValidationFailure(event.payload);
  
  // Return error result
  return {
    success: false,
    reason: "validation-failed",
    details: "Invalid order data"
  };
}

This functional approach means:

  1. Errors are just another branch in the flow

  2. Error handlers are regular functions

  3. Error information flows through context

  4. No special syntax or constructs are needed

Error Propagation and Context

Error information naturally flows through context:

function processPayment(event, context) {
  try {
    // Process the payment
    const paymentResult = chargeCustomer(event.payload.payment);
    
    if (!paymentResult.success) {
      // Store error details in context
      context.error = {
        code: paymentResult.code,
        message: paymentResult.message,
        timestamp: Date.now(),
        correlationId: event.context.causal.correlationId
      };
      
      return handlePaymentFailure;
    }
    
    // Success path
    return createShipment;
  } catch (error) {
    // Unexpected error - store and return handler
    context.error = {
      unexpected: true,
      message: error.message,
      stack: error.stack,
      timestamp: Date.now()
    };
    
    return handleUnexpectedError;
  }
}

function handlePaymentFailure(event, context) {
  // Access error from context
  const { error } = context;
  
  // Log structured error
  logError("Payment failed", error);
  
  // Return result with error details
  return {
    success: false,
    reason: "payment-failed",
    details: error.message,
    code: error.code
  };
}

This approach allows error information to flow naturally through the event chain, maintaining complete context for diagnostics and recovery.

Recovery Patterns

Happen's functional approach enables powerful recovery patterns through composition.

Retry Pattern

function processPayment(event, context) {
  // Initialize retry count
  context.retryCount = context.retryCount || 0;
  
  try {
    // Attempt payment processing
    const result = chargeCustomer(event.payload.payment);
    
    // Success - continue to shipping
    context.payment = result;
    return createShipment;
  } catch (error) {
    // Store error
    context.lastError = error;
    
    // Increment retry count
    context.retryCount++;
    
    // Determine if we should retry
    if (context.retryCount < 3) {
      console.log(`Retrying payment (${context.retryCount}/3)...`);
      
      // Return self to retry
      return processPayment;
    }
    
    // Too many retries
    return handlePaymentFailure;
  }
}

Circuit Breaker Pattern

// Shared circuit state (could be in external store)
const circuits = {
  payment: {
    failures: 0,
    status: "closed", // closed, open, half-open
    lastFailure: null
  }
};

function processPayment(event, context) {
  const circuit = circuits.payment;
  
  // Check if circuit is open
  if (circuit.status === "open") {
    return {
      success: false,
      reason: "service-unavailable",
      message: "Payment service is temporarily unavailable"
    };
  }
  
  try {
    // Process payment
    const result = chargeCustomer(event.payload.payment);
    
    // Success - reset circuit if in half-open state
    if (circuit.status === "half-open") {
      circuit.status = "closed";
      circuit.failures = 0;
    }
    
    // Continue with success
    context.payment = result;
    return createShipment;
  } catch (error) {
    // Update circuit state
    circuit.failures++;
    circuit.lastFailure = Date.now();
    
    // Check threshold for opening circuit
    if (circuit.status === "closed" && circuit.failures >= 5) {
      // Open the circuit
      circuit.status = "open";
      
      // Schedule reset to half-open
      setTimeout(() => {
        circuit.status = "half-open";
      }, 30000); // 30 second timeout
    }
    
    // Return error result
    return {
      success: false,
      reason: "payment-failed",
      message: error.message,
      circuitStatus: circuit.status
    };
  }
}

Fallback Pattern

function processPayment(event, context) {
  try {
    // Try primary payment processor
    const result = primaryPaymentProcessor.charge(event.payload.payment);
    context.payment = result;
    return createShipment;
  } catch (error) {
    // Store error for context
    context.primaryError = error;
    
    // Return fallback function
    return useBackupPaymentProcessor;
  }
}

function useBackupPaymentProcessor(event, context) {
  console.log("Primary payment processor failed. Using backup.");
  
  try {
    // Try fallback processor
    const result = backupPaymentProcessor.charge(event.payload.payment);
    
    // Success - continue normal flow
    context.payment = result;
    context.usedFallback = true;
    return createShipment;
  } catch (error) {
    // Both processors failed
    return {
      success: false,
      reason: "payment-failed",
      message: "Both primary and backup payment processors failed",
      primaryError: context.primaryError.message,
      backupError: error.message
    };
  }
}

Error Events as First-Class Citizens

In Happen, errors can also be treated as normal events, enabling system-wide error handling:

// When detecting an error, broadcast an error event
paymentNode.on("process-payment", (event, context) => {
  try {
    // Process payment
    const result = processPayment(event.payload);
    return { success: true, transactionId: result.id };
  } catch (error) {
    // Broadcast error event
    paymentNode.broadcast({
      type: "payment.error",
      payload: {
        orderId: event.payload.orderId,
        error: {
          message: error.message,
          code: error.code || "UNKNOWN"
        },
        timestamp: Date.now()
      }
    });
    
    // Return failure to the original sender
    return {
      success: false,
      reason: "payment-error",
      message: error.message
    };
  }
});

// Error monitoring node can observe all error events
monitoringNode.on(type => type.endsWith(".error"), (event) => {
  // Extract service name from event type
  const service = event.type.split('.')[0];
  
  // Record error
  recordServiceError(service, event.payload);
  
  // Check thresholds for alerts
  checkErrorThresholds(service);
  
  return { monitored: true };
});

Distributed Error Handling

When errors cross node boundaries, they naturally maintain their causal context:

// Node A: Original error occurs
nodeA.on("process-task", async (event) => {
  try {
    // Process task
    const result = await processTask(event.payload);
    return { success: true, result };
  } catch (error) {
    // Return error result
    return {
      success: false,
      reason: "task-failed",
      error: error.message,
      timestamp: Date.now()
    };
  }
});

// Node B: Handles failure from Node A
nodeB.on("orchestrate-workflow", async (event) => {
  // Call Node A
  const result = await nodeB.send(nodeA, {
    type: "process-task",
    payload: event.payload.taskData
  }).return();
  
  // Check for error
  if (!result.success) {
    // The error from Node A is now available in Node B
    // with the causal chain intact
    
    // Handle error based on reason
    if (result.reason === "task-failed") {
      return handleTaskFailure;
    }
    
    return {
      success: false,
      reason: "workflow-failed",
      cause: result.reason,
      error: result.error
    };
  }
  
  // Continue with success path
  return continueWorkflow;
});

Supervisor Pattern

For system-wide resilience, you can create supervisor nodes that monitor and manage error recovery:

// Create a supervisor node
const supervisorNode = createNode("system-supervisor");

// Monitor error events across the system
supervisorNode.on(type => type.endsWith(".error"), (event) => {
  // Extract service from event type
  const service = event.type.split('.')[0];
  
  // Track error frequency
  supervisorNode.state.set(state => {
    const services = state.services || {};
    const serviceState = services[service] || { errors: 0, lastError: 0 };
    
    return {
      ...state,
      services: {
        ...services,
        [service]: {
          ...serviceState,
          errors: serviceState.errors + 1,
          lastError: Date.now()
        }
      }
    };
  });
  
  // Check for restart threshold
  const serviceState = supervisorNode.state.get(state => 
    (state.services && state.services[service]) || { errors: 0 }
  );
  
  if (serviceState.errors >= 5) {
    // Too many errors - trigger restart
    supervisorNode.broadcast({
      type: "service.restart",
      payload: { 
        service,
        reason: "excessive-errors",
        count: serviceState.errors
      }
    });
    
    // Reset error count
    supervisorNode.state.set(state => ({
      ...state,
      services: {
        ...state.services,
        [service]: {
          ...state.services[service],
          errors: 0,
          lastRestart: Date.now()
        }
      }
    }));
  }
  
  return { monitored: true };
});

Composing Error Handling with Normal Flow

Error handling in Happen integrates seamlessly with normal event processing:

// Complete order processing flow with error handling
orderNode.on("process-order", validateOrder);

function validateOrder(event, context) {
  // Validate order
  const validation = validateOrderData(event.payload);
  
  if (!validation.valid) {
    return handleInvalidOrder;
  }
  
  // Store validation result
  context.validatedOrder = {
    ...event.payload,
    validated: true,
    validatedAt: Date.now()
  };
  
  // Continue to inventory check
  return checkInventory;
}

function checkInventory(event, context) {
  // Check inventory
  const inventoryResult = checkInventoryLevels(context.validatedOrder.items);
  
  if (!inventoryResult.available) {
    // Store inventory problem
    context.inventoryIssue = inventoryResult;
    return handleInventoryShortage;
  }
  
  // Continue to payment
  return processPayment;
}

function processPayment(event, context) {
  try {
    // Process payment
    const paymentResult = chargeCustomer(
      context.validatedOrder.payment,
      calculateTotal(context.validatedOrder.items)
    );
    
    if (!paymentResult.success) {
      context.paymentIssue = paymentResult;
      return handlePaymentFailure;
    }
    
    // Store payment result
    context.payment = paymentResult;
    
    // Continue to shipment creation
    return createShipment;
  } catch (error) {
    context.error = error;
    return handleUnexpectedError;
  }
}

function createShipment(event, context) {
  // Create shipment
  const shipment = createShipmentRecord({
    order: context.validatedOrder,
    payment: context.payment
  });
  
  // Return success result
  return {
    success: true,
    orderId: context.validatedOrder.id,
    shipmentId: shipment.id,
    trackingNumber: shipment.trackingNumber
  };
}

// Error handlers
function handleInvalidOrder(event, context) {
  return {
    success: false,
    reason: "validation-failed",
    details: "Invalid order data"
  };
}

function handleInventoryShortage(event, context) {
  const { inventoryIssue } = context;
  
  // Try to get availability estimate
  const availability = estimateAvailability(inventoryIssue.missingItems);
  
  return {
    success: false,
    reason: "inventory-shortage",
    unavailableItems: inventoryIssue.missingItems,
    estimatedAvailability: availability
  };
}

function handlePaymentFailure(event, context) {
  return {
    success: false,
    reason: "payment-failed",
    details: context.paymentIssue.message,
    code: context.paymentIssue.code
  };
}

function handleUnexpectedError(event, context) {
  // Log unexpected error
  logError("Unexpected error processing order", context.error);
  
  return {
    success: false,
    reason: "system-error",
    message: "An unexpected error occurred"
  };
}

Treating errors as branches in the functional flow provides sophisticated error handling capabilities without introducing special constructs.

Key takeaways:

  1. Use Function Returns for Error Flow: Return error handler functions to handle errors

  2. Leverage Context: Store error information in the context object for diagnostics

  3. Implement Recovery Patterns: Build error recovery using function composition

  4. Apply Resilience Patterns: Implement retry, circuit breakers, and fallbacks as needed

  5. Decentralized Recovery: Let nodes make local decisions about error handling

Last updated