Sanitize noisy MCP text fields by default

Clean control and invisible junk from tool result text fields to reduce token waste while preserving readable Unicode. Add an MCP_TEXT_SANITIZATION toggle and regression tests for enabled and disabled behavior.
This commit is contained in:
Jason Thistlethwaite
2026-05-06 02:31:25 -04:00
parent def9084981
commit 22c8e915e9
8 changed files with 162 additions and 5 deletions
+72 -2
View File
@@ -37,11 +37,13 @@ final class McpDispatcher
private RedmineClient $redmine;
private McpDebugLogger $logger;
private bool $sanitizeToolText;
public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null)
public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null, bool $sanitizeToolText = true)
{
$this->redmine = $redmine;
$this->logger = $logger ?? new McpDebugLogger(null);
$this->sanitizeToolText = $sanitizeToolText;
}
/**
@@ -471,7 +473,12 @@ final class McpDispatcher
throw new RuntimeException('Unknown tool: ' . $name);
}
$encoded = json_encode($this->redactSensitive($result), JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
$prepared = $this->redactSensitive($result);
if ($this->sanitizeToolText) {
$prepared = $this->sanitizeToolResult($prepared);
}
$encoded = json_encode($prepared, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
if ($encoded === false) {
throw new RuntimeException('Could not encode tool result.');
}
@@ -758,4 +765,67 @@ final class McpDispatcher
'token',
], true);
}
/**
* @param mixed $value
*
* @return mixed
*/
private function sanitizeToolResult($value, string $key = '')
{
if (is_string($value)) {
if (!$this->shouldSanitizeTextKey($key)) {
return $value;
}
return $this->sanitizeText($value);
}
if (!is_array($value)) {
return $value;
}
$sanitized = [];
foreach ($value as $childKey => $childValue) {
$sanitized[$childKey] = $this->sanitizeToolResult(
$childValue,
is_string($childKey) ? $childKey : ''
);
}
return $sanitized;
}
private function shouldSanitizeTextKey(string $key): bool
{
$normalized = strtolower(trim($key));
if ($normalized === '') {
return false;
}
return in_array($normalized, [
'description',
'notes',
'content',
'body',
'text',
'message',
'message_body',
'message_text',
'plain_text',
'plain_body',
'html_body',
], true);
}
private function sanitizeText(string $value): string
{
$value = str_replace(["\r\n", "\r"], "\n", $value);
$value = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/u', '', $value) ?? $value;
$value = preg_replace('/\p{Cf}+/u', '', $value) ?? $value;
$value = preg_replace('/[^\S\n]{3,}/u', ' ', $value) ?? $value;
$value = preg_replace('/\n{4,}/u', "\n\n\n", $value) ?? $value;
$value = preg_replace('/([[:punct:]])\1{7,}/u', '$1$1$1$1$1$1', $value) ?? $value;
return $value;
}
}