Sanitize noisy MCP text fields by default
Clean control and invisible junk from tool result text fields to reduce token waste while preserving readable Unicode. Add an MCP_TEXT_SANITIZATION toggle and regression tests for enabled and disabled behavior.
This commit is contained in:
@@ -37,11 +37,13 @@ final class McpDispatcher
|
||||
|
||||
private RedmineClient $redmine;
|
||||
private McpDebugLogger $logger;
|
||||
private bool $sanitizeToolText;
|
||||
|
||||
public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null)
|
||||
public function __construct(RedmineClient $redmine, ?McpDebugLogger $logger = null, bool $sanitizeToolText = true)
|
||||
{
|
||||
$this->redmine = $redmine;
|
||||
$this->logger = $logger ?? new McpDebugLogger(null);
|
||||
$this->sanitizeToolText = $sanitizeToolText;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -471,7 +473,12 @@ final class McpDispatcher
|
||||
throw new RuntimeException('Unknown tool: ' . $name);
|
||||
}
|
||||
|
||||
$encoded = json_encode($this->redactSensitive($result), JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||
$prepared = $this->redactSensitive($result);
|
||||
if ($this->sanitizeToolText) {
|
||||
$prepared = $this->sanitizeToolResult($prepared);
|
||||
}
|
||||
|
||||
$encoded = json_encode($prepared, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES);
|
||||
if ($encoded === false) {
|
||||
throw new RuntimeException('Could not encode tool result.');
|
||||
}
|
||||
@@ -758,4 +765,67 @@ final class McpDispatcher
|
||||
'token',
|
||||
], true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $value
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
private function sanitizeToolResult($value, string $key = '')
|
||||
{
|
||||
if (is_string($value)) {
|
||||
if (!$this->shouldSanitizeTextKey($key)) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
return $this->sanitizeText($value);
|
||||
}
|
||||
if (!is_array($value)) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
$sanitized = [];
|
||||
foreach ($value as $childKey => $childValue) {
|
||||
$sanitized[$childKey] = $this->sanitizeToolResult(
|
||||
$childValue,
|
||||
is_string($childKey) ? $childKey : ''
|
||||
);
|
||||
}
|
||||
|
||||
return $sanitized;
|
||||
}
|
||||
|
||||
private function shouldSanitizeTextKey(string $key): bool
|
||||
{
|
||||
$normalized = strtolower(trim($key));
|
||||
if ($normalized === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return in_array($normalized, [
|
||||
'description',
|
||||
'notes',
|
||||
'content',
|
||||
'body',
|
||||
'text',
|
||||
'message',
|
||||
'message_body',
|
||||
'message_text',
|
||||
'plain_text',
|
||||
'plain_body',
|
||||
'html_body',
|
||||
], true);
|
||||
}
|
||||
|
||||
private function sanitizeText(string $value): string
|
||||
{
|
||||
$value = str_replace(["\r\n", "\r"], "\n", $value);
|
||||
$value = preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/u', '', $value) ?? $value;
|
||||
$value = preg_replace('/\p{Cf}+/u', '', $value) ?? $value;
|
||||
$value = preg_replace('/[^\S\n]{3,}/u', ' ', $value) ?? $value;
|
||||
$value = preg_replace('/\n{4,}/u', "\n\n\n", $value) ?? $value;
|
||||
$value = preg_replace('/([[:punct:]])\1{7,}/u', '$1$1$1$1$1$1', $value) ?? $value;
|
||||
|
||||
return $value;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user