Sanitize noisy MCP text fields by default

Clean control and invisible junk from tool result text fields to reduce token waste while preserving readable Unicode. Add an MCP_TEXT_SANITIZATION toggle and regression tests for enabled and disabled behavior.
This commit is contained in:
Jason Thistlethwaite
2026-05-06 02:31:25 -04:00
parent def9084981
commit 22c8e915e9
8 changed files with 162 additions and 5 deletions
+1
View File
@@ -1,2 +1,3 @@
REDMINE_URL=http://192.168.50.170 REDMINE_URL=http://192.168.50.170
REDMINE_API_KEY= REDMINE_API_KEY=
MCP_TEXT_SANITIZATION=true
+5
View File
@@ -298,6 +298,11 @@ and IDs. Authorization headers, bearer tokens, and Redmine API keys are not
logged. MCP tool output also redacts credential fields returned by Redmine, such logged. MCP tool output also redacts credential fields returned by Redmine, such
as `api_key`. as `api_key`.
Tool output text sanitization is enabled by default to reduce token waste from
invisible/control junk in fetched issue text. This cleanup preserves readable
Unicode and targets fields such as `description`, `notes`, `content`, and
message body text. Set `MCP_TEXT_SANITIZATION=false` to disable it.
Example stdio client configuration: Example stdio client configuration:
```json ```json
+72 -2
View File
@@ -37,11 +37,13 @@ final class McpDispatcher
private RedmineClient $redmine; private RedmineClient $redmine;
private McpDebugLogger $logger; private McpDebugLogger $logger;
private bool $sanitizeToolText;
public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null) public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null, bool $sanitizeToolText = true)
{ {
$this->redmine = $redmine; $this->redmine = $redmine;
$this->logger = $logger ?? new McpDebugLogger(null); $this->logger = $logger ?? new McpDebugLogger(null);
$this->sanitizeToolText = $sanitizeToolText;
} }
/** /**
@@ -471,7 +473,12 @@ final class McpDispatcher
throw new RuntimeException('Unknown tool: ' . $name); throw new RuntimeException('Unknown tool: ' . $name);
} }
$encoded = json_encode($this->redactSensitive($result), JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); $prepared = $this->redactSensitive($result);
if ($this->sanitizeToolText) {
$prepared = $this->sanitizeToolResult($prepared);
}
$encoded = json_encode($prepared, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
if ($encoded === false) { if ($encoded === false) {
throw new RuntimeException('Could not encode tool result.'); throw new RuntimeException('Could not encode tool result.');
} }
@@ -758,4 +765,67 @@ final class McpDispatcher
'token', 'token',
], true); ], true);
} }
/**
* @param mixed $value
*
* @return mixed
*/
private function sanitizeToolResult($value, string $key = '')
{
if (is_string($value)) {
if (!$this->shouldSanitizeTextKey($key)) {
return $value;
}
return $this->sanitizeText($value);
}
if (!is_array($value)) {
return $value;
}
$sanitized = [];
foreach ($value as $childKey => $childValue) {
$sanitized[$childKey] = $this->sanitizeToolResult(
$childValue,
is_string($childKey) ? $childKey : ''
);
}
return $sanitized;
}
private function shouldSanitizeTextKey(string $key): bool
{
$normalized = strtolower(trim($key));
if ($normalized === '') {
return false;
}
return in_array($normalized, [
'description',
'notes',
'content',
'body',
'text',
'message',
'message_body',
'message_text',
'plain_text',
'plain_body',
'html_body',
], true);
}
private function sanitizeText(string $value): string
{
$value = str_replace(["\r\n", "\r"], "\n", $value);
$value = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/u', '', $value) ?? $value;
$value = preg_replace('/\p{Cf}+/u', '', $value) ?? $value;
$value = preg_replace('/[^\S\n]{3,}/u', ' ', $value) ?? $value;
$value = preg_replace('/\n{4,}/u', "\n\n\n", $value) ?? $value;
$value = preg_replace('/([[:punct:]])\1{7,}/u', '$1$1$1$1$1$1', $value) ?? $value;
return $value;
}
} }
+23 -1
View File
@@ -9,7 +9,7 @@ use RuntimeException;
final class McpEnvironment final class McpEnvironment
{ {
/** /**
* @return array{redmine_url:string,redmine_api_key:string,mcp_server_token:?string,mcp_debug_log:?string} * @return array{redmine_url:string,redmine_api_key:string,mcp_server_token:?string,mcp_debug_log:?string,mcp_text_sanitization:bool}
*/ */
public static function load(string $envFile): array public static function load(string $envFile): array
{ {
@@ -24,6 +24,7 @@ final class McpEnvironment
'redmine_api_key' => $apiKey, 'redmine_api_key' => $apiKey,
'mcp_server_token' => self::optionalString(getenv('MCP_SERVER_TOKEN') ?: ($env['MCP_SERVER_TOKEN'] ?? null)), 'mcp_server_token' => self::optionalString(getenv('MCP_SERVER_TOKEN') ?: ($env['MCP_SERVER_TOKEN'] ?? null)),
'mcp_debug_log' => self::optionalString(getenv('MCP_DEBUG_LOG') ?: ($env['MCP_DEBUG_LOG'] ?? null)), 'mcp_debug_log' => self::optionalString(getenv('MCP_DEBUG_LOG') ?: ($env['MCP_DEBUG_LOG'] ?? null)),
'mcp_text_sanitization' => self::boolSetting(getenv('MCP_TEXT_SANITIZATION') ?: ($env['MCP_TEXT_SANITIZATION'] ?? null), true),
]; ];
} }
@@ -57,4 +58,25 @@ final class McpEnvironment
return $value; return $value;
} }
private static function boolSetting(mixed $value, bool $default): bool
{
if (!is_string($value)) {
return $default;
}
$normalized = strtolower(trim($value));
if ($normalized === '') {
return $default;
}
if (in_array($normalized, ['1', 'true', 'yes', 'on'], true)) {
return true;
}
if (in_array($normalized, ['0', 'false', 'no', 'off'], true)) {
return false;
}
return $default;
}
} }
+2 -1
View File
@@ -22,7 +22,8 @@ if ($token === null) {
$handler = new McpHttpHandler( $handler = new McpHttpHandler(
new McpDispatcher( new McpDispatcher(
RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']), RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']),
new McpDebugLogger($env['mcp_debug_log']) new McpDebugLogger($env['mcp_debug_log']),
$env['mcp_text_sanitization']
), ),
$token, $token,
getenv('MCP_HTTP_PATH') ?: '/mcp' getenv('MCP_HTTP_PATH') ?: '/mcp'
+2 -1
View File
@@ -15,7 +15,8 @@ $env = McpEnvironment::load(__DIR__ . '/../.env');
$server = new McpStdioServer( $server = new McpStdioServer(
new McpDispatcher( new McpDispatcher(
RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']), RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']),
new McpDebugLogger($env['mcp_debug_log']) new McpDebugLogger($env['mcp_debug_log']),
$env['mcp_text_sanitization']
) )
); );
$server->run(); $server->run();
+56
View File
@@ -78,6 +78,8 @@ final class RedmineStructureTest
$this->testMcpFindProjectRecommendsExactIdentifier(); $this->testMcpFindProjectRecommendsExactIdentifier();
$this->testMcpFindProjectRecommendsExactName(); $this->testMcpFindProjectRecommendsExactName();
$this->testMcpFindProjectLeavesAmbiguousMatchesUnrecommended(); $this->testMcpFindProjectLeavesAmbiguousMatchesUnrecommended();
$this->testMcpSearchSanitizesNoisyTextFields();
$this->testMcpSearchCanDisableTextSanitization();
$this->testCreateRelationDefaultsToRelatesAndRequiresTarget(); $this->testCreateRelationDefaultsToRelatesAndRequiresTarget();
$this->testAttachmentUploadSupportsPathAndBase64(); $this->testAttachmentUploadSupportsPathAndBase64();
$this->testAttachmentUploadAcceptsPdfDataUrl(); $this->testAttachmentUploadAcceptsPdfDataUrl();
@@ -239,6 +241,49 @@ final class RedmineStructureTest
$this->assertSame('quality-archive', $result['matches'][1]['identifier'], 'second ambiguous match is returned'); $this->assertSame('quality-archive', $result['matches'][1]['identifier'], 'second ambiguous match is returned');
} }
private function testMcpSearchSanitizesNoisyTextFields(): void
{
$http = new RecordingClient();
$http->queueJson([
'results' => [[
'title' => 'Ticket result',
'description' => "Caf\u{00E9}\u{200B} issue\x07 !!!!!!!!!!\n\n\n\nDone",
'notes' => "Agent\u{FEFF} note\x1F........",
]],
]);
$dispatcher = new McpDispatcher(new RedmineClient($http));
$result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']);
$description = (string) $result['results'][0]['description'];
$notes = (string) $result['results'][0]['notes'];
$this->assertStringContains('Café issue', $description, 'sanitizer preserves readable unicode content');
$this->assertNotStringContains("\x07", $description, 'sanitizer removes control characters from description');
$this->assertNotStringContains("\u{200B}", $description, 'sanitizer removes zero-width characters from description');
$this->assertNotStringContains('!!!!!!!!!!', $description, 'sanitizer caps excessive repeated punctuation in description');
$this->assertNotStringContains("\n\n\n\n", $description, 'sanitizer caps excessive blank lines in description');
$this->assertNotStringContains("\x1F", $notes, 'sanitizer removes control characters from notes');
$this->assertNotStringContains('.........', $notes, 'sanitizer caps excessive repeated punctuation in notes');
}
private function testMcpSearchCanDisableTextSanitization(): void
{
$http = new RecordingClient();
$http->queueJson([
'results' => [[
'description' => "Raw\u{200B} text\x07 !!!!!!!!!!",
]],
]);
$dispatcher = new McpDispatcher(new RedmineClient($http), null, false);
$result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']);
$description = (string) $result['results'][0]['description'];
$this->assertStringContains("\u{200B}", $description, 'sanitization toggle off keeps zero-width characters untouched');
$this->assertStringContains("\x07", $description, 'sanitization toggle off keeps control characters untouched');
$this->assertStringContains('!!!!!!!!!!', $description, 'sanitization toggle off keeps repeated punctuation untouched');
}
private function testCreateRelationDefaultsToRelatesAndRequiresTarget(): void private function testCreateRelationDefaultsToRelatesAndRequiresTarget(): void
{ {
$http = new RecordingClient(); $http = new RecordingClient();
@@ -500,6 +545,17 @@ final class RedmineStructureTest
exit(1); exit(1);
} }
private function assertNotStringContains(string $needle, string $haystack, string $message): void
{
$this->assertions++;
if (strpos($haystack, $needle) === false) {
return;
}
fwrite(STDERR, "FAIL: {$message}\nUnexpected needle: {$needle}\nHaystack: {$haystack}\n");
exit(1);
}
/** /**
* @param array<int,string> $haystack * @param array<int,string> $haystack
*/ */
@@ -10,6 +10,7 @@ Required environment:
```text ```text
REDMINE_URL=http://redmine.example.test REDMINE_URL=http://redmine.example.test
REDMINE_API_KEY=... REDMINE_API_KEY=...
MCP_TEXT_SANITIZATION=true
``` ```
For Streamable HTTP MCP: For Streamable HTTP MCP: