Sanitize noisy MCP text fields by default

Clean control and invisible junk from tool result text fields to reduce token waste while preserving readable Unicode. Add an MCP_TEXT_SANITIZATION toggle and regression tests for enabled and disabled behavior.
This commit is contained in:
Jason Thistlethwaite
2026-05-06 02:31:25 -04:00
parent def9084981
commit 22c8e915e9
8 changed files with 162 additions and 5 deletions
+1
View File
@@ -1,2 +1,3 @@
REDMINE_URL=http://192.168.50.170
REDMINE_API_KEY=
MCP_TEXT_SANITIZATION=true
+5
View File
@@ -298,6 +298,11 @@ and IDs. Authorization headers, bearer tokens, and Redmine API keys are not
logged. MCP tool output also redacts credential fields returned by Redmine, such
as `api_key`.
Tool output text sanitization is enabled by default to reduce token waste from
invisible/control junk in fetched issue text. This cleanup preserves readable
Unicode and targets fields such as `description`, `notes`, `content`, and
message body text. Set `MCP_TEXT_SANITIZATION=false` to disable it.
Example stdio client configuration:
```json
+72 -2
View File
@@ -37,11 +37,13 @@ final class McpDispatcher
private RedmineClient $redmine;
private McpDebugLogger $logger;
private bool $sanitizeToolText;
public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null)
public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null, bool $sanitizeToolText = true)
{
$this->redmine = $redmine;
$this->logger = $logger ?? new McpDebugLogger(null);
$this->sanitizeToolText = $sanitizeToolText;
}
/**
@@ -471,7 +473,12 @@ final class McpDispatcher
throw new RuntimeException('Unknown tool: ' . $name);
}
$encoded = json_encode($this->redactSensitive($result), JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
$prepared = $this->redactSensitive($result);
if ($this->sanitizeToolText) {
$prepared = $this->sanitizeToolResult($prepared);
}
$encoded = json_encode($prepared, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
if ($encoded === false) {
throw new RuntimeException('Could not encode tool result.');
}
@@ -758,4 +765,67 @@ final class McpDispatcher
'token',
], true);
}
/**
* @param mixed $value
*
* @return mixed
*/
private function sanitizeToolResult($value, string $key = '')
{
if (is_string($value)) {
if (!$this->shouldSanitizeTextKey($key)) {
return $value;
}
return $this->sanitizeText($value);
}
if (!is_array($value)) {
return $value;
}
$sanitized = [];
foreach ($value as $childKey => $childValue) {
$sanitized[$childKey] = $this->sanitizeToolResult(
$childValue,
is_string($childKey) ? $childKey : ''
);
}
return $sanitized;
}
private function shouldSanitizeTextKey(string $key): bool
{
$normalized = strtolower(trim($key));
if ($normalized === '') {
return false;
}
return in_array($normalized, [
'description',
'notes',
'content',
'body',
'text',
'message',
'message_body',
'message_text',
'plain_text',
'plain_body',
'html_body',
], true);
}
private function sanitizeText(string $value): string
{
$value = str_replace(["\r\n", "\r"], "\n", $value);
$value = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/u', '', $value) ?? $value;
$value = preg_replace('/\p{Cf}+/u', '', $value) ?? $value;
$value = preg_replace('/[^\S\n]{3,}/u', ' ', $value) ?? $value;
$value = preg_replace('/\n{4,}/u', "\n\n\n", $value) ?? $value;
$value = preg_replace('/([[:punct:]])\1{7,}/u', '$1$1$1$1$1$1', $value) ?? $value;
return $value;
}
}
+23 -1
View File
@@ -9,7 +9,7 @@ use RuntimeException;
final class McpEnvironment
{
/**
* @return array{redmine_url:string,redmine_api_key:string,mcp_server_token:?string,mcp_debug_log:?string}
* @return array{redmine_url:string,redmine_api_key:string,mcp_server_token:?string,mcp_debug_log:?string,mcp_text_sanitization:bool}
*/
public static function load(string $envFile): array
{
@@ -24,6 +24,7 @@ final class McpEnvironment
'redmine_api_key' => $apiKey,
'mcp_server_token' => self::optionalString(getenv('MCP_SERVER_TOKEN') ?: ($env['MCP_SERVER_TOKEN'] ?? null)),
'mcp_debug_log' => self::optionalString(getenv('MCP_DEBUG_LOG') ?: ($env['MCP_DEBUG_LOG'] ?? null)),
'mcp_text_sanitization' => self::boolSetting(getenv('MCP_TEXT_SANITIZATION') ?: ($env['MCP_TEXT_SANITIZATION'] ?? null), true),
];
}
@@ -57,4 +58,25 @@ final class McpEnvironment
return $value;
}
private static function boolSetting(mixed $value, bool $default): bool
{
if (!is_string($value)) {
return $default;
}
$normalized = strtolower(trim($value));
if ($normalized === '') {
return $default;
}
if (in_array($normalized, ['1', 'true', 'yes', 'on'], true)) {
return true;
}
if (in_array($normalized, ['0', 'false', 'no', 'off'], true)) {
return false;
}
return $default;
}
}
+2 -1
View File
@@ -22,7 +22,8 @@ if ($token === null) {
$handler = new McpHttpHandler(
new McpDispatcher(
RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']),
new McpDebugLogger($env['mcp_debug_log'])
new McpDebugLogger($env['mcp_debug_log']),
$env['mcp_text_sanitization']
),
$token,
getenv('MCP_HTTP_PATH') ?: '/mcp'
+2 -1
View File
@@ -15,7 +15,8 @@ $env = McpEnvironment::load(__DIR__ . '/../.env');
$server = new McpStdioServer(
new McpDispatcher(
RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']),
new McpDebugLogger($env['mcp_debug_log'])
new McpDebugLogger($env['mcp_debug_log']),
$env['mcp_text_sanitization']
)
);
$server->run();
+56
View File
@@ -78,6 +78,8 @@ final class RedmineStructureTest
$this->testMcpFindProjectRecommendsExactIdentifier();
$this->testMcpFindProjectRecommendsExactName();
$this->testMcpFindProjectLeavesAmbiguousMatchesUnrecommended();
$this->testMcpSearchSanitizesNoisyTextFields();
$this->testMcpSearchCanDisableTextSanitization();
$this->testCreateRelationDefaultsToRelatesAndRequiresTarget();
$this->testAttachmentUploadSupportsPathAndBase64();
$this->testAttachmentUploadAcceptsPdfDataUrl();
@@ -239,6 +241,49 @@ final class RedmineStructureTest
$this->assertSame('quality-archive', $result['matches'][1]['identifier'], 'second ambiguous match is returned');
}
private function testMcpSearchSanitizesNoisyTextFields(): void
{
$http = new RecordingClient();
$http->queueJson([
'results' => [[
'title' => 'Ticket result',
'description' => "Caf\u{00E9}\u{200B} issue\x07 !!!!!!!!!!\n\n\n\nDone",
'notes' => "Agent\u{FEFF} note\x1F........",
]],
]);
$dispatcher = new McpDispatcher(new RedmineClient($http));
$result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']);
$description = (string) $result['results'][0]['description'];
$notes = (string) $result['results'][0]['notes'];
$this->assertStringContains('Café issue', $description, 'sanitizer preserves readable unicode content');
$this->assertNotStringContains("\x07", $description, 'sanitizer removes control characters from description');
$this->assertNotStringContains("\u{200B}", $description, 'sanitizer removes zero-width characters from description');
$this->assertNotStringContains('!!!!!!!!!!', $description, 'sanitizer caps excessive repeated punctuation in description');
$this->assertNotStringContains("\n\n\n\n", $description, 'sanitizer caps excessive blank lines in description');
$this->assertNotStringContains("\x1F", $notes, 'sanitizer removes control characters from notes');
$this->assertNotStringContains('.........', $notes, 'sanitizer caps excessive repeated punctuation in notes');
}
private function testMcpSearchCanDisableTextSanitization(): void
{
$http = new RecordingClient();
$http->queueJson([
'results' => [[
'description' => "Raw\u{200B} text\x07 !!!!!!!!!!",
]],
]);
$dispatcher = new McpDispatcher(new RedmineClient($http), null, false);
$result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']);
$description = (string) $result['results'][0]['description'];
$this->assertStringContains("\u{200B}", $description, 'sanitization toggle off keeps zero-width characters untouched');
$this->assertStringContains("\x07", $description, 'sanitization toggle off keeps control characters untouched');
$this->assertStringContains('!!!!!!!!!!', $description, 'sanitization toggle off keeps repeated punctuation untouched');
}
private function testCreateRelationDefaultsToRelatesAndRequiresTarget(): void
{
$http = new RecordingClient();
@@ -500,6 +545,17 @@ final class RedmineStructureTest
exit(1);
}
private function assertNotStringContains(string $needle, string $haystack, string $message): void
{
$this->assertions++;
if (strpos($haystack, $needle) === false) {
return;
}
fwrite(STDERR, "FAIL: {$message}\nUnexpected needle: {$needle}\nHaystack: {$haystack}\n");
exit(1);
}
/**
* @param array<int,string> $haystack
*/
@@ -10,6 +10,7 @@ Required environment:
```text
REDMINE_URL=http://redmine.example.test
REDMINE_API_KEY=...
MCP_TEXT_SANITIZATION=true
```
For Streamable HTTP MCP: