116 lines
2.9 KiB
PHP
116 lines
2.9 KiB
PHP
<?php
|
|
|
|
/**
|
|
* Einmaliges Skript: Liest deutschsprachige Sektionen aus database/data/b2in-en/*.json,
|
|
* übersetzt String-Werte per MyMemory (de→en) und schreibt die Dateien zurück.
|
|
*
|
|
* Ausführung: php database/scripts/build-b2in-en-from-de.php
|
|
*
|
|
* Hinweis: MyMemory hat ein Kontingent; bei Fehlern bleibt der Originaltext erhalten.
|
|
*/
|
|
|
|
declare(strict_types=1);
|
|
|
|
$dir = dirname(__DIR__).'/data/b2in-en';
|
|
$cacheFile = dirname(__DIR__).'/../storage/app/b2in-en-translation-cache.json';
|
|
|
|
$cache = is_file($cacheFile) ? json_decode((string) file_get_contents($cacheFile), true, 512, JSON_THROW_ON_ERROR) : [];
|
|
|
|
function shouldSkipString(string $s): bool
|
|
{
|
|
$t = trim($s);
|
|
if ($t === '') {
|
|
return true;
|
|
}
|
|
if (preg_match('/^[\d\s.,:%€$+\-–—]+$/u', $t)) {
|
|
return true;
|
|
}
|
|
if (preg_match('#^https?://#i', $t)) {
|
|
return true;
|
|
}
|
|
if (str_starts_with($t, '/') && ! str_contains($t, ' ')) {
|
|
return true;
|
|
}
|
|
if (preg_match('/^[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}$/i', $t)) {
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
function translate(string $text, array &$cache): string
|
|
{
|
|
if (shouldSkipString($text)) {
|
|
return $text;
|
|
}
|
|
if (isset($cache[$text])) {
|
|
return $cache[$text];
|
|
}
|
|
|
|
$body = http_build_query([
|
|
'q' => $text,
|
|
'langpair' => 'de|en',
|
|
]);
|
|
|
|
$ctx = stream_context_create([
|
|
'http' => [
|
|
'method' => 'POST',
|
|
'timeout' => 60,
|
|
'header' => "Content-Type: application/x-www-form-urlencoded\r\nUser-Agent: B2inCmsBuild/1.0\r\n",
|
|
'content' => $body,
|
|
],
|
|
]);
|
|
|
|
$raw = @file_get_contents('https://api.mymemory.translated.net/get', false, $ctx);
|
|
if ($raw === false) {
|
|
return $text;
|
|
}
|
|
|
|
$data = json_decode($raw, true);
|
|
$out = $data['responseData']['translatedText'] ?? null;
|
|
if (! is_string($out) || $out === '') {
|
|
return $text;
|
|
}
|
|
|
|
if (($data['responseStatus'] ?? 200) !== 200) {
|
|
return $text;
|
|
}
|
|
|
|
$cache[$text] = $out;
|
|
usleep(150000);
|
|
|
|
return $out;
|
|
}
|
|
|
|
function walk(mixed $v, callable $fn): mixed
|
|
{
|
|
if (is_string($v)) {
|
|
return $fn($v);
|
|
}
|
|
if (is_array($v)) {
|
|
$out = [];
|
|
foreach ($v as $k => $x) {
|
|
$out[$k] = walk($x, $fn);
|
|
}
|
|
|
|
return $out;
|
|
}
|
|
|
|
return $v;
|
|
}
|
|
|
|
$translateFn = function (string $s) use (&$cache): string {
|
|
return translate($s, $cache);
|
|
};
|
|
|
|
foreach (glob($dir.'/*.json') ?: [] as $path) {
|
|
$name = basename($path);
|
|
$data = json_decode((string) file_get_contents($path), true, 512, JSON_THROW_ON_ERROR);
|
|
$out = walk($data, $translateFn);
|
|
file_put_contents($path, json_encode($out, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)."\n");
|
|
echo "Updated {$name}\n";
|
|
}
|
|
|
|
file_put_contents($cacheFile, json_encode($cache, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
|
|
|
|
echo 'Done. Cache: '.$cacheFile."\n";
|