presseportale/app/Services/PressRelease/PressReleaseHtmlSanitizer.php
2026-06-12 15:24:20 +00:00

91 lines
2.7 KiB
PHP

<?php
namespace App\Services\PressRelease;
use Illuminate\Support\HtmlString;
use Mews\Purifier\Facades\Purifier;
/**
* Sanitizes press-release HTML coming out of the Flux/Tiptap editor.
*
* - On save: clean() strips everything outside the press_release allowlist.
* - On display: render() returns a safe HtmlString. Legacy plain-text content
* (PMs imported before Phase 7) is detected heuristically and wrapped in
* <p>/<br> so it renders consistently next to new HTML content.
*/
class PressReleaseHtmlSanitizer
{
private const string PURIFIER_PROFILE = 'press_release';
public function __construct(private readonly PressReleaseLinkPolicy $linkPolicy) {}
/**
* Sanitize HTML before persisting to the database.
*/
public function clean(?string $html): string
{
if ($html === null || trim($html) === '') {
return '';
}
return (string) Purifier::clean($html, self::PURIFIER_PROFILE);
}
/**
* Detect whether the stored text is already HTML (Phase 7+) or
* legacy plain text from older imports.
*/
public function isHtml(?string $text): bool
{
if ($text === null || $text === '') {
return false;
}
return (bool) preg_match('/<(p|br|h2|h3|strong|em|ul|ol|li|blockquote|a)\b[^>]*>/i', $text);
}
/**
* Produce a display-ready, safe HtmlString.
*
* Die Link-Policy (rel systemseitig: extern sponsored/nofollow,
* portalintern follow) greift hier beim Rendern — so wirken
* Regel-Änderungen rückwirkend auf alle gespeicherten Inhalte.
*/
public function render(?string $text): HtmlString
{
if ($text === null || trim($text) === '') {
return new HtmlString('');
}
if ($this->isHtml($text)) {
return new HtmlString($this->linkPolicy->apply($this->clean($text)));
}
$escaped = e($text);
$withBreaks = nl2br($escaped, false);
$paragraphs = preg_split('/(?:<br\s*\/?>\s*){2,}/i', $withBreaks) ?: [$withBreaks];
$html = collect($paragraphs)
->map(fn (string $chunk): string => trim($chunk))
->filter()
->map(fn (string $chunk): string => '<p>'.$chunk.'</p>')
->implode('');
return new HtmlString($html);
}
/**
* Plain-text length for character counters (without HTML noise).
*/
public function plainTextLength(?string $text): int
{
if ($text === null || $text === '') {
return 0;
}
$stripped = strip_tags($text);
$decoded = html_entity_decode($stripped, ENT_QUOTES | ENT_HTML5, 'UTF-8');
return mb_strlen(trim((string) preg_replace('/\s+/u', ' ', $decoded)));
}
}