Custom Middleware
Build your own middleware from scratch.
Quick Start
1. Create Middleware Class
public class SimpleLoggingMiddleware : IAgentMiddleware
{
public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
Console.WriteLine($"Iteration {context.Iteration} starting with {context.Messages.Count} messages");
return Task.CompletedTask;
}
}2. Register
var agent = new AgentBuilder()
.WithMiddleware(new SimpleLoggingMiddleware())
.Build();That's it! Implement only the hooks you need - all others have default no-op implementations.
Choosing Hooks
Match your use case to the right hook:
| Goal | Hook | Example |
|---|---|---|
| Inject context once | BeforeMessageTurnAsync | RAG, memory retrieval |
| Modify prompts per iteration | BeforeIterationAsync | Dynamic instructions |
| Retry LLM calls | WrapModelCallAsync | Exponential backoff |
| Count tokens | WrapModelCallStreamingAsync | Progressive counting |
| Validate tools | BeforeToolExecutionAsync | Circuit breaker |
| Check permissions | BeforeFunctionAsync | Human approval |
| Retry functions | WrapFunctionCallAsync | Network retry |
| Log results | AfterIterationAsync | Telemetry |
| Extract memory | AfterMessageTurnAsync | Conversation summary |
| Handle errors | OnErrorAsync | Centralized logging |
See 05.1 Middleware Lifecycle for complete hook reference.
Pattern 1: Simple Hook
No state, no events - just modify context.
public class DynamicInstructionsMiddleware : IAgentMiddleware
{
public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
// Add retry instruction on failures
if (context.Iteration > 0)
{
context.Messages.Insert(0, new ChatMessage(
ChatRole.System,
"Previous attempt failed. Try a different approach."
));
}
// Reduce temperature on retries
if (context.Iteration > 1)
{
context.Options.Temperature = 0.3f;
}
return Task.CompletedTask;
}
}Pattern 2: Wrap Hook (Retry)
Wrap LLM or function calls with custom logic.
public class RetryMiddleware : IAgentMiddleware
{
private readonly int _maxRetries;
public RetryMiddleware(int maxRetries = 3)
{
_maxRetries = maxRetries;
}
public async Task<ModelResponse> WrapModelCallAsync(
ModelRequest request,
Func<ModelRequest, Task<ModelResponse>> handler,
CancellationToken ct)
{
for (int attempt = 0; attempt < _maxRetries; attempt++)
{
try
{
return await handler(request);
}
catch (HttpRequestException ex) when (attempt < _maxRetries - 1)
{
var delay = TimeSpan.FromSeconds(Math.Pow(2, attempt));
await Task.Delay(delay, ct);
}
}
// Final attempt
return await handler(request);
}
}Pattern 3: State Management
Track data across iterations using typed state.
Step 1: Define state:
[MiddlewareState]
public sealed record TokenCountState
{
public int TotalTokens { get; init; }
}Step 2: Use in middleware:
public class TokenCounterMiddleware : IAgentMiddleware
{
public async IAsyncEnumerable<ChatResponseUpdate> WrapModelCallStreamingAsync(
ModelRequest request,
Func<ModelRequest, IAsyncEnumerable<ChatResponseUpdate>> handler,
[EnumeratorCancellation] CancellationToken ct)
{
var state = request.State.MiddlewareState.TokenCount ?? new();
int tokens = 0;
await foreach (var update in handler(request).WithCancellation(ct))
{
if (update.Contents != null)
{
foreach (var content in update.Contents)
if (content is TextContent text)
tokens += EstimateTokens(text.Text);
}
yield return update;
}
// Update state (needs context - see note below)
// context.UpdateState(s => s with
// {
// MiddlewareState = s.MiddlewareState.WithTokenCount(state with
// {
// TotalTokens = state.TotalTokens + tokens
// })
// });
}
private int EstimateTokens(string text) => text.Length / 4;
}Note: WrapModelCallStreamingAsync receives ModelRequest (not context). For state updates in Wrap hooks, emit an event or use BeforeIterationAsync/AfterIterationAsync instead.
See 05.2 Middleware State for full state guide.
Pattern 3.5: Persistent State (Cross-Run Caching)
Use Persistent = true for state that should survive across agent runs.
Step 1: Define persistent state:
[MiddlewareState(Persistent = true)]
public sealed record UserPreferencesState
{
public Dictionary<string, string> Settings { get; init; } = new();
public DateTime? LastUpdated { get; init; }
}Step 2: Use in middleware:
public class UserPreferencesMiddleware : IAgentMiddleware
{
public Task BeforeMessageTurnAsync(BeforeMessageTurnContext context, CancellationToken ct)
{
// Load preferences (automatically restored from previous runs)
var lang = context.GetMiddlewareState<UserPreferencesState>()?
.Settings.GetValueOrDefault("language");
// Apply preferences to this run
if (lang != null)
{
context.Messages.Insert(0, new ChatMessage(
ChatRole.System,
$"User prefers responses in {lang}"));
}
return Task.CompletedTask;
}
public Task AfterMessageTurnAsync(AfterMessageTurnContext context, CancellationToken ct)
{
// Extract language preference from conversation
if (DetectLanguagePreference(context.FinalResponse, out var newLang))
{
context.UpdateMiddlewareState<UserPreferencesState>(prefs => prefs with
{
Settings = new Dictionary<string, string>(prefs.Settings)
{
["language"] = newLang
},
LastUpdated = DateTime.UtcNow
});
}
return Task.CompletedTask;
}
private bool DetectLanguagePreference(ChatMessage message, out string language)
{
// Detection logic...
language = "English";
return false;
}
}When to use Persistent = true:
- Expensive caches - Summarization results, embeddings
- User preferences - Settings, permissions
- Long-term metrics - Total API calls, usage stats
When to use transient (default):
- Safety state - Error counts, circuit breakers
- Per-run tracking - Current iteration, temp data
Example: Expensive Cache
[MiddlewareState(Persistent = true)]
public sealed record ConversationSummaryState
{
public string? Summary { get; init; }
public int MessagesSummarized { get; init; }
}
public class SummaryMiddleware : IAgentMiddleware
{
public async Task BeforeMessageTurnAsync(BeforeMessageTurnContext context, CancellationToken ct)
{
var summary = context.GetMiddlewareState<ConversationSummaryState>()?.Summary;
if (summary != null && context.Messages.Count > 100)
{
// Use cached summary instead of re-processing all messages
context.Messages.Insert(0, new ChatMessage(
ChatRole.System,
$"Previous conversation summary: {summary}"));
// Trim old messages
context.Messages.RemoveRange(1, 50);
}
}
public async Task AfterMessageTurnAsync(AfterMessageTurnContext context, CancellationToken ct)
{
var messagesSummarized = context.GetMiddlewareState<ConversationSummaryState>()?
.MessagesSummarized ?? 0;
if (context.Messages.Count > messagesSummarized + 50)
{
// Generate new summary (expensive LLM call)
var newSummary = await GenerateSummary(context.Messages, ct);
context.UpdateMiddlewareState<ConversationSummaryState>(_ => new()
{
Summary = newSummary,
MessagesSummarized = context.Messages.Count
});
// Summary persists to AgentSession automatically
// Next run reuses it instead of expensive re-summarization!
}
}
}See 05.2 Middleware State for full persistence guide.
Pattern 4: Request/Response Events
Interactive middleware with user approval.
Step 1: Define events:
public class PermissionRequestEvent : AgentEvent
{
public required string FunctionName { get; init; }
public required string RequestId { get; init; }
}
public class PermissionResponseEvent : AgentEvent
{
public required string RequestId { get; init; }
public required bool Approved { get; init; }
}Step 2: Implement middleware:
public class PermissionMiddleware : IAgentMiddleware
{
public async Task BeforeFunctionAsync(BeforeFunctionContext context, CancellationToken ct)
{
var requestId = Guid.NewGuid().ToString();
// Emit request
context.Emit(new PermissionRequestEvent
{
FunctionName = context.Function.Name,
RequestId = requestId
});
// Wait for response (with timeout)
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromSeconds(30));
try
{
var response = await context.WaitForResponseAsync<PermissionResponseEvent>(
requestId,
cts.Token
);
if (!response.Approved)
{
context.BlockExecution = true;
context.OverrideResult = "Permission denied";
}
}
catch (OperationCanceledException)
{
// Timeout - deny by default
context.BlockExecution = true;
context.OverrideResult = "Permission request timed out";
}
}
}Step 3: Handle in UI:
await foreach (var evt in agent.RunAsync("Do something", ct))
{
if (evt is PermissionRequestEvent req)
{
var approved = await ShowPermissionDialog(req.FunctionName);
await agent.EmitEventAsync(new PermissionResponseEvent
{
RequestId = req.RequestId,
Approved = approved
});
}
}See 05.3 Middleware Events for full event guide.
Pattern 5: Multi-Hook Middleware
Use multiple hooks for complex logic.
public class ComprehensiveLoggingMiddleware : IAgentMiddleware
{
private readonly ILogger _logger;
public ComprehensiveLoggingMiddleware(ILogger logger)
{
_logger = logger;
}
public Task BeforeMessageTurnAsync(BeforeMessageTurnContext context, CancellationToken ct)
{
_logger.LogInformation("Turn started: {UserMessage}", context.UserMessage);
return Task.CompletedTask;
}
public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
_logger.LogInformation("Iteration {Iteration}: {MessageCount} messages",
context.Iteration, context.Messages.Count);
return Task.CompletedTask;
}
public Task BeforeFunctionAsync(BeforeFunctionContext context, CancellationToken ct)
{
_logger.LogInformation("Calling function: {FunctionName}", context.Function.Name);
return Task.CompletedTask;
}
public Task AfterFunctionAsync(AfterFunctionContext context, CancellationToken ct)
{
if (context.Exception != null)
{
_logger.LogError("Function {Name} failed: {Error}",
context.Function.Name, context.Exception.Message);
}
else
{
_logger.LogInformation("Function {Name} succeeded", context.Function.Name);
}
return Task.CompletedTask;
}
public Task AfterMessageTurnAsync(AfterMessageTurnContext context, CancellationToken ct)
{
_logger.LogInformation("Turn completed: {Response}", context.FinalResponse.Text);
return Task.CompletedTask;
}
public Task OnErrorAsync(ErrorContext context, CancellationToken ct)
{
_logger.LogError("Error in {Source}: {Error}",
context.Source, context.Error.Message);
return Task.CompletedTask;
}
}Complete Example: Rate Limiter
Full-featured middleware with state, events, and multiple hooks.
State:
[MiddlewareState]
public sealed record RateLimitState
{
public DateTime? WindowStart { get; init; }
public int CallsInWindow { get; init; }
}Events:
public class RateLimitExceededEvent : AgentEvent
{
public required int CallsInWindow { get; init; }
public required int MaxCalls { get; init; }
public required TimeSpan ResetIn { get; init; }
}Middleware:
public class RateLimitMiddleware : IAgentMiddleware
{
public int MaxCallsPerMinute { get; set; } = 10;
public async Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
var state = context.GetMiddlewareState<RateLimitState>();
var windowStart = state?.WindowStart;
var callsInWindow = state?.CallsInWindow ?? 0;
var now = DateTime.UtcNow;
// Check if in same window
if (windowStart.HasValue &&
(now - windowStart.Value) < TimeSpan.FromMinutes(1))
{
if (callsInWindow >= MaxCallsPerMinute)
{
var resetIn = TimeSpan.FromMinutes(1) - (now - windowStart.Value);
// Emit event
context.Emit(new RateLimitExceededEvent
{
CallsInWindow = callsInWindow,
MaxCalls = MaxCallsPerMinute,
ResetIn = resetIn
});
// Wait for window to reset
await Task.Delay(resetIn, ct);
// Start new window
context.UpdateMiddlewareState<RateLimitState>(_ => new RateLimitState
{
WindowStart = DateTime.UtcNow,
CallsInWindow = 1
});
}
else
{
// Increment count
context.UpdateMiddlewareState<RateLimitState>(s => s with
{
CallsInWindow = s.CallsInWindow + 1
});
}
}
else
{
// Start new window
context.UpdateMiddlewareState<RateLimitState>(_ => new RateLimitState
{
WindowStart = now,
CallsInWindow = 1
});
}
}
}Best Practices
1. Use Typed Contexts
// GOOD: Typed context
public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
context.Messages.Add(systemMessage); // Compile-time safe
}
// BAD: No type safety without typed contexts2. Handle Null State
// GOOD: Extension methods auto-instantiate
var count = context.GetMiddlewareState<MyState>()?.Count ?? 0;
context.UpdateMiddlewareState<MyState>(s => s with { Count = s.Count + 1 });
// ALSO GOOD: Advanced - use when updating core state too
context.UpdateState(s => {
var state = s.MiddlewareState.MyState ?? new();
return s with { /* ... */ };
});
// BAD: Would be NullReferenceException (but won't compile - context.State is internal)
// var count = context.State.MiddlewareState.MyState.Count;3. Use Immutable Updates
// GOOD: Extension methods with 'with' expression
context.UpdateMiddlewareState<MyState>(s => s with { Count = s.Count + 1 });
// ALSO GOOD: Advanced approach for complex updates
context.UpdateState(s =>
{
var state = s.MiddlewareState.MyState ?? new();
return s with
{
MiddlewareState = s.MiddlewareState.WithMyState(state with { Count = state.Count + 1 })
};
});
// BAD: Would be mutation
// state.Count++; // Compile error - init-only property4. Set Timeouts on WaitForResponseAsync
// GOOD: Timeout
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(TimeSpan.FromSeconds(30));
var response = await context.WaitForResponseAsync<MyEvent>(requestId, cts.Token);
// BAD: No timeout - could hang forever
var response = await context.WaitForResponseAsync<MyEvent>(requestId, ct);5. Don't Use Instance Fields for State
// BAD: Race condition with parallel RunAsync calls
private int _count = 0;
public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
_count++; // Not thread-safe!
}
// GOOD: Use middleware state extensions
public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
context.UpdateMiddlewareState<MyCounter>(s => s with { Count = s.Count + 1 });
return Task.CompletedTask;
}6. Only Implement Hooks You Need
// GOOD: Only implement required hooks
public class SimpleMiddleware : IAgentMiddleware
{
public Task BeforeIterationAsync(BeforeIterationContext context, CancellationToken ct)
{
// Only hook we need
}
// All other hooks use default no-op implementation
}
// BAD: Implementing unused hooks
public class VerboseMiddleware : IAgentMiddleware
{
public Task BeforeMessageTurnAsync(...) => Task.CompletedTask; // Unused
public Task AfterMessageTurnAsync(...) => Task.CompletedTask; // Unused
public Task BeforeIterationAsync(...) { /* Actual logic */ }
// etc...
}Testing Middleware
[Fact]
public async Task TestRetryMiddleware()
{
var callCount = 0;
var middleware = new RetryMiddleware(maxRetries: 3);
var request = new ModelRequest { /* ... */ };
var response = await middleware.WrapModelCallAsync(
request,
async (req) =>
{
callCount++;
if (callCount < 3)
throw new HttpRequestException("Simulated error");
return new ModelResponse { /* success */ };
},
CancellationToken.None
);
Assert.Equal(3, callCount); // Retried twice, succeeded on 3rd
Assert.NotNull(response);
}Next Steps
- 05.1 Middleware Lifecycle - Complete hook reference
- 05.2 Middleware State - State management patterns
- 05.3 Middleware Events - Event emission and handling
- 05.4 Built-in Middleware - Study production examples
- ../Getting Started/04 Middleware.md - Overview