Sanitize noisy MCP text fields by default
Clean control and invisible junk from tool result text fields to reduce token waste while preserving readable Unicode. Add an MCP_TEXT_SANITIZATION toggle and regression tests for enabled and disabled behavior.
This commit is contained in:
@@ -1,2 +1,3 @@
|
||||
REDMINE_URL=http://192.168.50.170
|
||||
REDMINE_API_KEY=
|
||||
MCP_TEXT_SANITIZATION=true
|
||||
|
||||
@@ -298,6 +298,11 @@ and IDs. Authorization headers, bearer tokens, and Redmine API keys are not
|
||||
logged. MCP tool output also redacts credential fields returned by Redmine, such
|
||||
as `api_key`.
|
||||
|
||||
Tool output text sanitization is enabled by default to reduce token waste from
|
||||
invisible/control junk in fetched issue text. This cleanup preserves readable
|
||||
Unicode and targets fields such as `description`, `notes`, `content`, and
|
||||
message body text. Set `MCP_TEXT_SANITIZATION=false` to disable it.
|
||||
|
||||
Example stdio client configuration:
|
||||
|
||||
```json
|
||||
|
||||
@@ -37,11 +37,13 @@ final class McpDispatcher
|
||||
|
||||
private RedmineClient $redmine;
|
||||
private McpDebugLogger $logger;
|
||||
private bool $sanitizeToolText;
|
||||
|
||||
public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null)
|
||||
public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null, bool $sanitizeToolText = true)
|
||||
{
|
||||
$this->redmine = $redmine;
|
||||
$this->logger = $logger ?? new McpDebugLogger(null);
|
||||
$this->sanitizeToolText = $sanitizeToolText;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -471,7 +473,12 @@ final class McpDispatcher
|
||||
throw new RuntimeException('Unknown tool: ' . $name);
|
||||
}
|
||||
|
||||
$encoded = json_encode($this->redactSensitive($result), JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||
$prepared = $this->redactSensitive($result);
|
||||
if ($this->sanitizeToolText) {
|
||||
$prepared = $this->sanitizeToolResult($prepared);
|
||||
}
|
||||
|
||||
$encoded = json_encode($prepared, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||
if ($encoded === false) {
|
||||
throw new RuntimeException('Could not encode tool result.');
|
||||
}
|
||||
@@ -758,4 +765,67 @@ final class McpDispatcher
|
||||
'token',
|
||||
], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
private function sanitizeToolResult($value, string $key = '')
|
||||
{
|
||||
if (is_string($value)) {
|
||||
if (!$this->shouldSanitizeTextKey($key)) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
return $this->sanitizeText($value);
|
||||
}
|
||||
if (!is_array($value)) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
$sanitized = [];
|
||||
foreach ($value as $childKey => $childValue) {
|
||||
$sanitized[$childKey] = $this->sanitizeToolResult(
|
||||
$childValue,
|
||||
is_string($childKey) ? $childKey : ''
|
||||
);
|
||||
}
|
||||
|
||||
return $sanitized;
|
||||
}
|
||||
|
||||
private function shouldSanitizeTextKey(string $key): bool
|
||||
{
|
||||
$normalized = strtolower(trim($key));
|
||||
if ($normalized === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return in_array($normalized, [
|
||||
'description',
|
||||
'notes',
|
||||
'content',
|
||||
'body',
|
||||
'text',
|
||||
'message',
|
||||
'message_body',
|
||||
'message_text',
|
||||
'plain_text',
|
||||
'plain_body',
|
||||
'html_body',
|
||||
], true);
|
||||
}
|
||||
|
||||
private function sanitizeText(string $value): string
|
||||
{
|
||||
$value = str_replace(["\r\n", "\r"], "\n", $value);
|
||||
$value = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/u', '', $value) ?? $value;
|
||||
$value = preg_replace('/\p{Cf}+/u', '', $value) ?? $value;
|
||||
$value = preg_replace('/[^\S\n]{3,}/u', ' ', $value) ?? $value;
|
||||
$value = preg_replace('/\n{4,}/u', "\n\n\n", $value) ?? $value;
|
||||
$value = preg_replace('/([[:punct:]])\1{7,}/u', '$1$1$1$1$1$1', $value) ?? $value;
|
||||
|
||||
return $value;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ use RuntimeException;
|
||||
final class McpEnvironment
|
||||
{
|
||||
/**
|
||||
* @return array{redmine_url:string,redmine_api_key:string,mcp_server_token:?string,mcp_debug_log:?string}
|
||||
* @return array{redmine_url:string,redmine_api_key:string,mcp_server_token:?string,mcp_debug_log:?string,mcp_text_sanitization:bool}
|
||||
*/
|
||||
public static function load(string $envFile): array
|
||||
{
|
||||
@@ -24,6 +24,7 @@ final class McpEnvironment
|
||||
'redmine_api_key' => $apiKey,
|
||||
'mcp_server_token' => self::optionalString(getenv('MCP_SERVER_TOKEN') ?: ($env['MCP_SERVER_TOKEN'] ?? null)),
|
||||
'mcp_debug_log' => self::optionalString(getenv('MCP_DEBUG_LOG') ?: ($env['MCP_DEBUG_LOG'] ?? null)),
|
||||
'mcp_text_sanitization' => self::boolSetting(getenv('MCP_TEXT_SANITIZATION') ?: ($env['MCP_TEXT_SANITIZATION'] ?? null), true),
|
||||
];
|
||||
}
|
||||
|
||||
@@ -57,4 +58,25 @@ final class McpEnvironment
|
||||
|
||||
return $value;
|
||||
}
|
||||
|
||||
private static function boolSetting(mixed $value, bool $default): bool
|
||||
{
|
||||
if (!is_string($value)) {
|
||||
return $default;
|
||||
}
|
||||
|
||||
$normalized = strtolower(trim($value));
|
||||
if ($normalized === '') {
|
||||
return $default;
|
||||
}
|
||||
|
||||
if (in_array($normalized, ['1', 'true', 'yes', 'on'], true)) {
|
||||
return true;
|
||||
}
|
||||
if (in_array($normalized, ['0', 'false', 'no', 'off'], true)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return $default;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,7 +22,8 @@ if ($token === null) {
|
||||
$handler = new McpHttpHandler(
|
||||
new McpDispatcher(
|
||||
RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']),
|
||||
new McpDebugLogger($env['mcp_debug_log'])
|
||||
new McpDebugLogger($env['mcp_debug_log']),
|
||||
$env['mcp_text_sanitization']
|
||||
),
|
||||
$token,
|
||||
getenv('MCP_HTTP_PATH') ?: '/mcp'
|
||||
|
||||
@@ -15,7 +15,8 @@ $env = McpEnvironment::load(__DIR__ . '/../.env');
|
||||
$server = new McpStdioServer(
|
||||
new McpDispatcher(
|
||||
RedmineClient::fromCredentials($env['redmine_url'], $env['redmine_api_key']),
|
||||
new McpDebugLogger($env['mcp_debug_log'])
|
||||
new McpDebugLogger($env['mcp_debug_log']),
|
||||
$env['mcp_text_sanitization']
|
||||
)
|
||||
);
|
||||
$server->run();
|
||||
|
||||
@@ -78,6 +78,8 @@ final class RedmineStructureTest
|
||||
$this->testMcpFindProjectRecommendsExactIdentifier();
|
||||
$this->testMcpFindProjectRecommendsExactName();
|
||||
$this->testMcpFindProjectLeavesAmbiguousMatchesUnrecommended();
|
||||
$this->testMcpSearchSanitizesNoisyTextFields();
|
||||
$this->testMcpSearchCanDisableTextSanitization();
|
||||
$this->testCreateRelationDefaultsToRelatesAndRequiresTarget();
|
||||
$this->testAttachmentUploadSupportsPathAndBase64();
|
||||
$this->testAttachmentUploadAcceptsPdfDataUrl();
|
||||
@@ -239,6 +241,49 @@ final class RedmineStructureTest
|
||||
$this->assertSame('quality-archive', $result['matches'][1]['identifier'], 'second ambiguous match is returned');
|
||||
}
|
||||
|
||||
private function testMcpSearchSanitizesNoisyTextFields(): void
|
||||
{
|
||||
$http = new RecordingClient();
|
||||
$http->queueJson([
|
||||
'results' => [[
|
||||
'title' => 'Ticket result',
|
||||
'description' => "Caf\u{00E9}\u{200B} issue\x07 !!!!!!!!!!\n\n\n\nDone",
|
||||
'notes' => "Agent\u{FEFF} note\x1F........",
|
||||
]],
|
||||
]);
|
||||
$dispatcher = new McpDispatcher(new RedmineClient($http));
|
||||
|
||||
$result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']);
|
||||
$description = (string) $result['results'][0]['description'];
|
||||
$notes = (string) $result['results'][0]['notes'];
|
||||
|
||||
$this->assertStringContains('Café issue', $description, 'sanitizer preserves readable unicode content');
|
||||
$this->assertNotStringContains("\x07", $description, 'sanitizer removes control characters from description');
|
||||
$this->assertNotStringContains("\u{200B}", $description, 'sanitizer removes zero-width characters from description');
|
||||
$this->assertNotStringContains('!!!!!!!!!!', $description, 'sanitizer caps excessive repeated punctuation in description');
|
||||
$this->assertNotStringContains("\n\n\n\n", $description, 'sanitizer caps excessive blank lines in description');
|
||||
$this->assertNotStringContains("\x1F", $notes, 'sanitizer removes control characters from notes');
|
||||
$this->assertNotStringContains('.........', $notes, 'sanitizer caps excessive repeated punctuation in notes');
|
||||
}
|
||||
|
||||
private function testMcpSearchCanDisableTextSanitization(): void
|
||||
{
|
||||
$http = new RecordingClient();
|
||||
$http->queueJson([
|
||||
'results' => [[
|
||||
'description' => "Raw\u{200B} text\x07 !!!!!!!!!!",
|
||||
]],
|
||||
]);
|
||||
$dispatcher = new McpDispatcher(new RedmineClient($http), null, false);
|
||||
|
||||
$result = $this->callToolJson($dispatcher, 'redmine_search', ['query' => 'ticket']);
|
||||
$description = (string) $result['results'][0]['description'];
|
||||
|
||||
$this->assertStringContains("\u{200B}", $description, 'sanitization toggle off keeps zero-width characters untouched');
|
||||
$this->assertStringContains("\x07", $description, 'sanitization toggle off keeps control characters untouched');
|
||||
$this->assertStringContains('!!!!!!!!!!', $description, 'sanitization toggle off keeps repeated punctuation untouched');
|
||||
}
|
||||
|
||||
private function testCreateRelationDefaultsToRelatesAndRequiresTarget(): void
|
||||
{
|
||||
$http = new RecordingClient();
|
||||
@@ -500,6 +545,17 @@ final class RedmineStructureTest
|
||||
exit(1);
|
||||
}
|
||||
|
||||
private function assertNotStringContains(string $needle, string $haystack, string $message): void
|
||||
{
|
||||
$this->assertions++;
|
||||
if (strpos($haystack, $needle) === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
fwrite(STDERR, "FAIL: {$message}\nUnexpected needle: {$needle}\nHaystack: {$haystack}\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int,string> $haystack
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user