b2in/database/scripts/build-b2in-en-from-de.php
2026-04-10 17:18:17 +02:00

116 lines
2.9 KiB
PHP

<?php
/**
* Einmaliges Skript: Liest deutschsprachige Sektionen aus database/data/b2in-en/*.json,
* übersetzt String-Werte per MyMemory (de→en) und schreibt die Dateien zurück.
*
* Ausführung: php database/scripts/build-b2in-en-from-de.php
*
* Hinweis: MyMemory hat ein Kontingent; bei Fehlern bleibt der Originaltext erhalten.
*/
declare(strict_types=1);
$dir = dirname(__DIR__).'/data/b2in-en';
$cacheFile = dirname(__DIR__).'/../storage/app/b2in-en-translation-cache.json';
$cache = is_file($cacheFile) ? json_decode((string) file_get_contents($cacheFile), true, 512, JSON_THROW_ON_ERROR) : [];
function shouldSkipString(string $s): bool
{
$t = trim($s);
if ($t === '') {
return true;
}
if (preg_match('/^[\d\s.,:%€$+\-–—]+$/u', $t)) {
return true;
}
if (preg_match('#^https?://#i', $t)) {
return true;
}
if (str_starts_with($t, '/') && ! str_contains($t, ' ')) {
return true;
}
if (preg_match('/^[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}$/i', $t)) {
return true;
}
return false;
}
function translate(string $text, array &$cache): string
{
if (shouldSkipString($text)) {
return $text;
}
if (isset($cache[$text])) {
return $cache[$text];
}
$body = http_build_query([
'q' => $text,
'langpair' => 'de|en',
]);
$ctx = stream_context_create([
'http' => [
'method' => 'POST',
'timeout' => 60,
'header' => "Content-Type: application/x-www-form-urlencoded\r\nUser-Agent: B2inCmsBuild/1.0\r\n",
'content' => $body,
],
]);
$raw = @file_get_contents('https://api.mymemory.translated.net/get', false, $ctx);
if ($raw === false) {
return $text;
}
$data = json_decode($raw, true);
$out = $data['responseData']['translatedText'] ?? null;
if (! is_string($out) || $out === '') {
return $text;
}
if (($data['responseStatus'] ?? 200) !== 200) {
return $text;
}
$cache[$text] = $out;
usleep(150000);
return $out;
}
function walk(mixed $v, callable $fn): mixed
{
if (is_string($v)) {
return $fn($v);
}
if (is_array($v)) {
$out = [];
foreach ($v as $k => $x) {
$out[$k] = walk($x, $fn);
}
return $out;
}
return $v;
}
$translateFn = function (string $s) use (&$cache): string {
return translate($s, $cache);
};
foreach (glob($dir.'/*.json') ?: [] as $path) {
$name = basename($path);
$data = json_decode((string) file_get_contents($path), true, 512, JSON_THROW_ON_ERROR);
$out = walk($data, $translateFn);
file_put_contents($path, json_encode($out, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)."\n");
echo "Updated {$name}\n";
}
file_put_contents($cacheFile, json_encode($cache, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
echo 'Done. Cache: '.$cacheFile."\n";