Sanitize noisy MCP text fields by default

Clean control and invisible junk from tool result text fields to reduce token waste while preserving readable Unicode. Add an MCP_TEXT_SANITIZATION toggle and regression tests for enabled and disabled behavior.
This commit is contained in:
Jason Thistlethwaite
2026-05-06 02:31:25 -04:00
parent def9084981
commit 22c8e915e9
8 changed files with 162 additions and 5 deletions
+2 -1
View File
@@ -15,7 +15,8 @@ $env = McpEnvironment::load(__DIR__ . '/../.env');
$server = new McpStdioServer(
new McpDispatcher(
RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']),
new McpDebugLogger($env['mcp_debug_log'])
new McpDebugLogger($env['mcp_debug_log']),
$env['mcp_text_sanitization']
)
);
$server->run();
+56
View File
@@ -78,6 +78,8 @@ final class RedmineStructureTest
$this->testMcpFindProjectRecommendsExactIdentifier();
$this->testMcpFindProjectRecommendsExactName();
$this->testMcpFindProjectLeavesAmbiguousMatchesUnrecommended();
$this->testMcpSearchSanitizesNoisyTextFields();
$this->testMcpSearchCanDisableTextSanitization();
$this->testCreateRelationDefaultsToRelatesAndRequiresTarget();
$this->testAttachmentUploadSupportsPathAndBase64();
$this->testAttachmentUploadAcceptsPdfDataUrl();
@@ -239,6 +241,49 @@ final class RedmineStructureTest
$this->assertSame('quality-archive', $result['matches'][1]['identifier'], 'second ambiguous match is returned');
}
private function testMcpSearchSanitizesNoisyTextFields(): void
{
$http = new RecordingClient();
$http->queueJson([
'results' => [[
'title' => 'Ticket result',
'description' => "Caf\u{00E9}\u{200B} issue\x07 !!!!!!!!!!\n\n\n\nDone",
'notes' => "Agent\u{FEFF} note\x1F........",
]],
]);
$dispatcher = new McpDispatcher(new RedmineClient($http));
$result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']);
$description = (string) $result['results'][0]['description'];
$notes = (string) $result['results'][0]['notes'];
$this->assertStringContains('Café issue', $description, 'sanitizer preserves readable unicode content');
$this->assertNotStringContains("\x07", $description, 'sanitizer removes control characters from description');
$this->assertNotStringContains("\u{200B}", $description, 'sanitizer removes zero-width characters from description');
$this->assertNotStringContains('!!!!!!!!!!', $description, 'sanitizer caps excessive repeated punctuation in description');
$this->assertNotStringContains("\n\n\n\n", $description, 'sanitizer caps excessive blank lines in description');
$this->assertNotStringContains("\x1F", $notes, 'sanitizer removes control characters from notes');
$this->assertNotStringContains('.........', $notes, 'sanitizer caps excessive repeated punctuation in notes');
}
private function testMcpSearchCanDisableTextSanitization(): void
{
$http = new RecordingClient();
$http->queueJson([
'results' => [[
'description' => "Raw\u{200B} text\x07 !!!!!!!!!!",
]],
]);
$dispatcher = new McpDispatcher(new RedmineClient($http), null, false);
$result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']);
$description = (string) $result['results'][0]['description'];
$this->assertStringContains("\u{200B}", $description, 'sanitization toggle off keeps zero-width characters untouched');
$this->assertStringContains("\x07", $description, 'sanitization toggle off keeps control characters untouched');
$this->assertStringContains('!!!!!!!!!!', $description, 'sanitization toggle off keeps repeated punctuation untouched');
}
private function testCreateRelationDefaultsToRelatesAndRequiresTarget(): void
{
$http = new RecordingClient();
@@ -500,6 +545,17 @@ final class RedmineStructureTest
exit(1);
}
private function assertNotStringContains(string $needle, string $haystack, string $message): void
{
$this->assertions++;
if (strpos($haystack, $needle) === false) {
return;
}
fwrite(STDERR, "FAIL: {$message}\nUnexpected needle: {$needle}\nHaystack: {$haystack}\n");
exit(1);
}
/**
* @param array<int,string> $haystack
*/