Monitoring and Debugging Chains
Production chains need observability. This lesson covers strategies for monitoring chain health and debugging issues.
Why Chains Need Special Monitoring
Chains are harder to debug than single prompts:
- Errors can occur at any step
- Issues may compound across steps
- Root cause often isn't in the failing step
- Context degrades in ways that aren't immediately visible
Observability Pillars
1. Logging
class ChainLogger {
constructor(chainId) {
this.chainId = chainId;
this.runId = generateRunId();
this.logs = [];
}
log(level, step, message, data = {}) {
const entry = {
timestamp: new Date().toISOString(),
chainId: this.chainId,
runId: this.runId,
level,
step,
message,
data
};
this.logs.push(entry);
// Also send to logging service
if (level === 'error') {
this.sendToAlertSystem(entry);
}
}
logStepStart(step, input) {
this.log('info', step, 'Step started', {
inputTokens: countTokens(input),
inputPreview: truncate(input, 200)
});
}
logStepComplete(step, output, duration) {
this.log('info', step, 'Step completed', {
outputTokens: countTokens(output),
outputPreview: truncate(output, 200),
durationMs: duration
});
}
logStepError(step, error, input) {
this.log('error', step, 'Step failed', {
error: error.message,
stack: error.stack,
inputPreview: truncate(input, 500)
});
}
}
2. Tracing
Loading Prompt Playground...
Implementing Distributed Tracing
class Tracer {
constructor() {
this.traces = new Map();
}
startTrace(chainId) {
const traceId = generateTraceId();
this.traces.set(traceId, {
traceId,
chainId,
spans: [],
startTime: Date.now()
});
return traceId;
}
startSpan(traceId, name, parentSpanId = null) {
const spanId = generateSpanId();
const trace = this.traces.get(traceId);
trace.spans.push({
spanId,
parentSpanId,
name,
startTime: Date.now(),
endTime: null,
status: 'running',
attributes: {},
events: []
});
return spanId;
}
addEvent(traceId, spanId, name, attributes = {}) {
const span = this.getSpan(traceId, spanId);
span.events.push({
name,
timestamp: Date.now(),
attributes
});
}
endSpan(traceId, spanId, status = 'ok', error = null) {
const span = this.getSpan(traceId, spanId);
span.endTime = Date.now();
span.status = status;
if (error) {
span.error = {
message: error.message,
stack: error.stack
};
}
}
getTrace(traceId) {
return this.traces.get(traceId);
}
}
// Usage
async function tracedChain(input) {
const tracer = new Tracer();
const traceId = tracer.startTrace('document-processor');
try {
const span1 = tracer.startSpan(traceId, 'validation');
const validated = await validate(input);
tracer.endSpan(traceId, span1);
const span2 = tracer.startSpan(traceId, 'processing');
const result = await process(validated);
tracer.endSpan(traceId, span2);
return result;
} catch (error) {
tracer.addEvent(traceId, 'current', 'error', { message: error.message });
throw error;
}
}
3. Metrics
class ChainMetrics {
constructor() {
this.counters = {};
this.histograms = {};
this.gauges = {};
}
// Count occurrences
increment(name, labels = {}) {
const key = this.makeKey(name, labels);
this.counters[key] = (this.counters[key] || 0) + 1;
}
// Track distributions
recordDuration(name, durationMs, labels = {}) {
const key = this.makeKey(name, labels);
if (!this.histograms[key]) {
this.histograms[key] = [];
}
this.histograms[key].push(durationMs);
}
// Track current values
setGauge(name, value, labels = {}) {
const key = this.makeKey(name, labels);
this.gauges[key] = value;
}
getMetrics() {
return {
counters: this.counters,
histograms: Object.fromEntries(
Object.entries(this.histograms).map(([k, v]) => [
k,
{
count: v.length,
avg: v.reduce((a, b) => a + b, 0) / v.length,
p50: percentile(v, 50),
p95: percentile(v, 95),
p99: percentile(v, 99)
}
])
),
gauges: this.gauges
};
}
}
// Key metrics to track
const metrics = new ChainMetrics();
async function instrumentedStep(name, fn) {
const start = Date.now();
metrics.increment('chain.step.started', { step: name });
try {
const result = await fn();
metrics.increment('chain.step.completed', { step: name });
metrics.recordDuration('chain.step.duration', Date.now() - start, { step: name });
return result;
} catch (error) {
metrics.increment('chain.step.failed', { step: name, error: error.name });
throw error;
}
}
Debugging Strategies
1. Replay Failed Runs
Loading Prompt Playground...
2. Step-by-Step Debugging
class ChainDebugger {
constructor(chain) {
this.chain = chain;
this.breakpoints = new Set();
this.snapshots = [];
}
setBreakpoint(stepName) {
this.breakpoints.add(stepName);
}
async runWithDebug(input) {
let currentInput = input;
for (const step of this.chain.steps) {
// Capture snapshot before step
this.snapshots.push({
step: step.name,
input: structuredClone(currentInput),
timestamp: Date.now()
});
// Check breakpoint
if (this.breakpoints.has(step.name)) {
console.log(`Breakpoint at ${step.name}`);
console.log('Input:', JSON.stringify(currentInput, null, 2));
await this.waitForContinue();
}
// Execute step
try {
currentInput = await step.execute(currentInput);
// Capture output
this.snapshots[this.snapshots.length - 1].output = structuredClone(currentInput);
} catch (error) {
this.snapshots[this.snapshots.length - 1].error = error;
throw error;
}
}
return currentInput;
}
getSnapshots() {
return this.snapshots;
}
}
3. Output Comparison
async function compareOutputs(originalOutput, newOutput) {
const prompt = `
Compare these two outputs and identify differences.
ORIGINAL OUTPUT:
${JSON.stringify(originalOutput, null, 2)}
NEW OUTPUT:
${JSON.stringify(newOutput, null, 2)}
Analyze:
1. What fields are different?
2. What values changed?
3. Are the differences significant?
4. Which output is more correct/complete?
Provide structured comparison.
`;
return await llm.chat({ content: prompt });
}
Alerting
Setting Up Alerts
class AlertManager {
constructor(config) {
this.thresholds = config.thresholds;
this.channels = config.channels;
}
checkThresholds(metrics) {
const alerts = [];
// Error rate threshold
const errorRate = metrics.errors / metrics.total;
if (errorRate > this.thresholds.errorRate) {
alerts.push({
severity: 'critical',
message: `Error rate ${(errorRate * 100).toFixed(1)}% exceeds threshold`,
metric: 'error_rate',
value: errorRate
});
}
// Latency threshold
if (metrics.p95Latency > this.thresholds.p95Latency) {
alerts.push({
severity: 'warning',
message: `P95 latency ${metrics.p95Latency}ms exceeds threshold`,
metric: 'p95_latency',
value: metrics.p95Latency
});
}
// Success rate drop
if (metrics.successRate < this.thresholds.minSuccessRate) {
alerts.push({
severity: 'critical',
message: `Success rate dropped to ${(metrics.successRate * 100).toFixed(1)}%`,
metric: 'success_rate',
value: metrics.successRate
});
}
return alerts;
}
async sendAlerts(alerts) {
for (const alert of alerts) {
for (const channel of this.channels[alert.severity]) {
await channel.send(alert);
}
}
}
}
Exercise: Debug a Failing Chain
Loading Prompt Playground...
Key Takeaways
- Implement structured logging for all chain steps
- Use distributed tracing for complex chains
- Track key metrics: latency, error rate, token usage
- Build replay capability for debugging
- Set up alerts for threshold breaches
- Capture enough context to reproduce issues
- Compare outputs when debugging regressions
- Monitor for gradual degradation, not just failures
Next, we'll cover scaling chains for production.

