From 22c8e915e9d9eb16457592fc0f0e8bc2cd333e94 Mon Sep 17 00:00:00 2001 From: Jason Thistlethwaite Date: Wed, 6 May 2026 02:31:25 -0400 Subject: [PATCH] Sanitize noisy MCP text fields by default Clean control and invisible junk from tool result text fields to reduce token waste while preserving readable Unicode. Add an MCP_TEXT_SANITIZATION toggle and regression tests for enabled and disabled behavior. --- redMCP/.env.example | 1 + redMCP/README.md | 5 ++ redMCP/app/McpDispatcher.php | 74 ++++++++++++++++++- redMCP/app/McpEnvironment.php | 24 +++++- redMCP/app/mcp-http-router.php | 3 +- redMCP/bin/redmcp-server.php | 3 +- redMCP/bin/test-redmine-structure.php | 56 ++++++++++++++ .../references/redmcp-tools.md | 1 + 8 files changed, 162 insertions(+), 5 deletions(-) diff --git a/redMCP/.env.example b/redMCP/.env.example index 8c20494..a98fa10 100644 --- a/redMCP/.env.example +++ b/redMCP/.env.example @@ -1,2 +1,3 @@ REDMINE_URL=http://192.168.50.170 REDMINE_API_KEY= +MCP_TEXT_SANITIZATION=true diff --git a/redMCP/README.md b/redMCP/README.md index b843afa..5dd348f 100644 --- a/redMCP/README.md +++ b/redMCP/README.md @@ -298,6 +298,11 @@ and IDs. Authorization headers, bearer tokens, and Redmine API keys are not logged. MCP tool output also redacts credential fields returned by Redmine, such as `api_key`. +Tool output text sanitization is enabled by default to reduce token waste from +invisible/control junk in fetched issue text. This cleanup preserves readable +Unicode and targets fields such as `description`, `notes`, `content`, and +message body text. Set `MCP_TEXT_SANITIZATION=false` to disable it. + Example stdio client configuration: ```json diff --git a/redMCP/app/McpDispatcher.php b/redMCP/app/McpDispatcher.php index 982da4a..72ffafc 100644 --- a/redMCP/app/McpDispatcher.php +++ b/redMCP/app/McpDispatcher.php @@ -37,11 +37,13 @@ final class McpDispatcher private RedmineClient $redmine; private McpDebugLogger $logger; + private bool $sanitizeToolText; - public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null) + public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null, bool $sanitizeToolText = true) { $this->redmine = $redmine; $this->logger = $logger ?? new McpDebugLogger(null); + $this->sanitizeToolText = $sanitizeToolText; } /** @@ -471,7 +473,12 @@ final class McpDispatcher throw new RuntimeException('Unknown tool: ' . $name); } - $encoded = json_encode($this->redactSensitive($result), JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); + $prepared = $this->redactSensitive($result); + if ($this->sanitizeToolText) { + $prepared = $this->sanitizeToolResult($prepared); + } + + $encoded = json_encode($prepared, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); if ($encoded === false) { throw new RuntimeException('Could not encode tool result.'); } @@ -758,4 +765,67 @@ final class McpDispatcher 'token', ], true); } + + /** + * @param mixed $value + * + * @return mixed + */ + private function sanitizeToolResult($value, string $key = '') + { + if (is_string($value)) { + if (!$this->shouldSanitizeTextKey($key)) { + return $value; + } + + return $this->sanitizeText($value); + } + if (!is_array($value)) { + return $value; + } + + $sanitized = []; + foreach ($value as $childKey => $childValue) { + $sanitized[$childKey] = $this->sanitizeToolResult( + $childValue, + is_string($childKey) ? $childKey : '' + ); + } + + return $sanitized; + } + + private function shouldSanitizeTextKey(string $key): bool + { + $normalized = strtolower(trim($key)); + if ($normalized === '') { + return false; + } + + return in_array($normalized, [ + 'description', + 'notes', + 'content', + 'body', + 'text', + 'message', + 'message_body', + 'message_text', + 'plain_text', + 'plain_body', + 'html_body', + ], true); + } + + private function sanitizeText(string $value): string + { + $value = str_replace(["\r\n", "\r"], "\n", $value); + $value = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/u', '', $value) ?? $value; + $value = preg_replace('/\p{Cf}+/u', '', $value) ?? $value; + $value = preg_replace('/[^\S\n]{3,}/u', ' ', $value) ?? $value; + $value = preg_replace('/\n{4,}/u', "\n\n\n", $value) ?? $value; + $value = preg_replace('/([[:punct:]])\1{7,}/u', '$1$1$1$1$1$1', $value) ?? $value; + + return $value; + } } diff --git a/redMCP/app/McpEnvironment.php b/redMCP/app/McpEnvironment.php index e2f177f..12d40ed 100644 --- a/redMCP/app/McpEnvironment.php +++ b/redMCP/app/McpEnvironment.php @@ -9,7 +9,7 @@ use RuntimeException; final class McpEnvironment { /** - * @return array{redmine_url:string,redmine_api_key:string,mcp_server_token:?string,mcp_debug_log:?string} + * @return array{redmine_url:string,redmine_api_key:string,mcp_server_token:?string,mcp_debug_log:?string,mcp_text_sanitization:bool} */ public static function load(string $envFile): array { @@ -24,6 +24,7 @@ final class McpEnvironment 'redmine_api_key' => $apiKey, 'mcp_server_token' => self::optionalString(getenv('MCP_SERVER_TOKEN') ?: ($env['MCP_SERVER_TOKEN'] ?? null)), 'mcp_debug_log' => self::optionalString(getenv('MCP_DEBUG_LOG') ?: ($env['MCP_DEBUG_LOG'] ?? null)), + 'mcp_text_sanitization' => self::boolSetting(getenv('MCP_TEXT_SANITIZATION') ?: ($env['MCP_TEXT_SANITIZATION'] ?? null), true), ]; } @@ -57,4 +58,25 @@ final class McpEnvironment return $value; } + + private static function boolSetting(mixed $value, bool $default): bool + { + if (!is_string($value)) { + return $default; + } + + $normalized = strtolower(trim($value)); + if ($normalized === '') { + return $default; + } + + if (in_array($normalized, ['1', 'true', 'yes', 'on'], true)) { + return true; + } + if (in_array($normalized, ['0', 'false', 'no', 'off'], true)) { + return false; + } + + return $default; + } } diff --git a/redMCP/app/mcp-http-router.php b/redMCP/app/mcp-http-router.php index a78d393..7ac7e36 100644 --- a/redMCP/app/mcp-http-router.php +++ b/redMCP/app/mcp-http-router.php @@ -22,7 +22,8 @@ if ($token === null) { $handler = new McpHttpHandler( new McpDispatcher( RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']), - new McpDebugLogger($env['mcp_debug_log']) + new McpDebugLogger($env['mcp_debug_log']), + $env['mcp_text_sanitization'] ), $token, getenv('MCP_HTTP_PATH') ?: '/mcp' diff --git a/redMCP/bin/redmcp-server.php b/redMCP/bin/redmcp-server.php index eb3e721..070668d 100755 --- a/redMCP/bin/redmcp-server.php +++ b/redMCP/bin/redmcp-server.php @@ -15,7 +15,8 @@ $env = McpEnvironment::load(__DIR__ . '/../.env'); $server = new McpStdioServer( new McpDispatcher( RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']), - new McpDebugLogger($env['mcp_debug_log']) + new McpDebugLogger($env['mcp_debug_log']), + $env['mcp_text_sanitization'] ) ); $server->run(); diff --git a/redMCP/bin/test-redmine-structure.php b/redMCP/bin/test-redmine-structure.php index 1cd2ff5..370472a 100755 --- a/redMCP/bin/test-redmine-structure.php +++ b/redMCP/bin/test-redmine-structure.php @@ -78,6 +78,8 @@ final class RedmineStructureTest $this->testMcpFindProjectRecommendsExactIdentifier(); $this->testMcpFindProjectRecommendsExactName(); $this->testMcpFindProjectLeavesAmbiguousMatchesUnrecommended(); + $this->testMcpSearchSanitizesNoisyTextFields(); + $this->testMcpSearchCanDisableTextSanitization(); $this->testCreateRelationDefaultsToRelatesAndRequiresTarget(); $this->testAttachmentUploadSupportsPathAndBase64(); $this->testAttachmentUploadAcceptsPdfDataUrl(); @@ -239,6 +241,49 @@ final class RedmineStructureTest $this->assertSame('quality-archive', $result['matches'][1]['identifier'], 'second ambiguous match is returned'); } + private function testMcpSearchSanitizesNoisyTextFields(): void + { + $http = new RecordingClient(); + $http->queueJson([ + 'results' => [[ + 'title' => 'Ticket result', + 'description' => "Caf\u{00E9}\u{200B} issue\x07 !!!!!!!!!!\n\n\n\nDone", + 'notes' => "Agent\u{FEFF} note\x1F........", + ]], + ]); + $dispatcher = new McpDispatcher(new RedmineClient($http)); + + $result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']); + $description = (string) $result['results'][0]['description']; + $notes = (string) $result['results'][0]['notes']; + + $this->assertStringContains('Café issue', $description, 'sanitizer preserves readable unicode content'); + $this->assertNotStringContains("\x07", $description, 'sanitizer removes control characters from description'); + $this->assertNotStringContains("\u{200B}", $description, 'sanitizer removes zero-width characters from description'); + $this->assertNotStringContains('!!!!!!!!!!', $description, 'sanitizer caps excessive repeated punctuation in description'); + $this->assertNotStringContains("\n\n\n\n", $description, 'sanitizer caps excessive blank lines in description'); + $this->assertNotStringContains("\x1F", $notes, 'sanitizer removes control characters from notes'); + $this->assertNotStringContains('.........', $notes, 'sanitizer caps excessive repeated punctuation in notes'); + } + + private function testMcpSearchCanDisableTextSanitization(): void + { + $http = new RecordingClient(); + $http->queueJson([ + 'results' => [[ + 'description' => "Raw\u{200B} text\x07 !!!!!!!!!!", + ]], + ]); + $dispatcher = new McpDispatcher(new RedmineClient($http), null, false); + + $result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']); + $description = (string) $result['results'][0]['description']; + + $this->assertStringContains("\u{200B}", $description, 'sanitization toggle off keeps zero-width characters untouched'); + $this->assertStringContains("\x07", $description, 'sanitization toggle off keeps control characters untouched'); + $this->assertStringContains('!!!!!!!!!!', $description, 'sanitization toggle off keeps repeated punctuation untouched'); + } + private function testCreateRelationDefaultsToRelatesAndRequiresTarget(): void { $http = new RecordingClient(); @@ -500,6 +545,17 @@ final class RedmineStructureTest exit(1); } + private function assertNotStringContains(string $needle, string $haystack, string $message): void + { + $this->assertions++; + if (strpos($haystack, $needle) === false) { + return; + } + + fwrite(STDERR, "FAIL: {$message}\nUnexpected needle: {$needle}\nHaystack: {$haystack}\n"); + exit(1); + } + /** * @param array $haystack */ diff --git a/skills/redmine-communicator/references/redmcp-tools.md b/skills/redmine-communicator/references/redmcp-tools.md index 9a73393..f22a8a4 100644 --- a/skills/redmine-communicator/references/redmcp-tools.md +++ b/skills/redmine-communicator/references/redmcp-tools.md @@ -10,6 +10,7 @@ Required environment: ```text REDMINE_URL=http://redmine.example.test REDMINE_API_KEY=... +MCP_TEXT_SANITIZATION=true ``` For Streamable HTTP MCP: