10-04-2026
This commit is contained in:
parent
4d6b4930b2
commit
4bb89aad8c
836 changed files with 52961 additions and 5950 deletions
116
database/scripts/build-b2in-en-from-de.php
Normal file
116
database/scripts/build-b2in-en-from-de.php
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* Einmaliges Skript: Liest deutschsprachige Sektionen aus database/data/b2in-en/*.json,
|
||||
* übersetzt String-Werte per MyMemory (de→en) und schreibt die Dateien zurück.
|
||||
*
|
||||
* Ausführung: php database/scripts/build-b2in-en-from-de.php
|
||||
*
|
||||
* Hinweis: MyMemory hat ein Kontingent; bei Fehlern bleibt der Originaltext erhalten.
|
||||
*/
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
$dir = dirname(__DIR__).'/data/b2in-en';
|
||||
$cacheFile = dirname(__DIR__).'/../storage/app/b2in-en-translation-cache.json';
|
||||
|
||||
$cache = is_file($cacheFile) ? json_decode((string) file_get_contents($cacheFile), true, 512, JSON_THROW_ON_ERROR) : [];
|
||||
|
||||
function shouldSkipString(string $s): bool
|
||||
{
|
||||
$t = trim($s);
|
||||
if ($t === '') {
|
||||
return true;
|
||||
}
|
||||
if (preg_match('/^[\d\s.,:%€$+\-–—]+$/u', $t)) {
|
||||
return true;
|
||||
}
|
||||
if (preg_match('#^https?://#i', $t)) {
|
||||
return true;
|
||||
}
|
||||
if (str_starts_with($t, '/') && ! str_contains($t, ' ')) {
|
||||
return true;
|
||||
}
|
||||
if (preg_match('/^[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}$/i', $t)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function translate(string $text, array &$cache): string
|
||||
{
|
||||
if (shouldSkipString($text)) {
|
||||
return $text;
|
||||
}
|
||||
if (isset($cache[$text])) {
|
||||
return $cache[$text];
|
||||
}
|
||||
|
||||
$body = http_build_query([
|
||||
'q' => $text,
|
||||
'langpair' => 'de|en',
|
||||
]);
|
||||
|
||||
$ctx = stream_context_create([
|
||||
'http' => [
|
||||
'method' => 'POST',
|
||||
'timeout' => 60,
|
||||
'header' => "Content-Type: application/x-www-form-urlencoded\r\nUser-Agent: B2inCmsBuild/1.0\r\n",
|
||||
'content' => $body,
|
||||
],
|
||||
]);
|
||||
|
||||
$raw = @file_get_contents('https://api.mymemory.translated.net/get', false, $ctx);
|
||||
if ($raw === false) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
$data = json_decode($raw, true);
|
||||
$out = $data['responseData']['translatedText'] ?? null;
|
||||
if (! is_string($out) || $out === '') {
|
||||
return $text;
|
||||
}
|
||||
|
||||
if (($data['responseStatus'] ?? 200) !== 200) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
$cache[$text] = $out;
|
||||
usleep(150000);
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
function walk(mixed $v, callable $fn): mixed
|
||||
{
|
||||
if (is_string($v)) {
|
||||
return $fn($v);
|
||||
}
|
||||
if (is_array($v)) {
|
||||
$out = [];
|
||||
foreach ($v as $k => $x) {
|
||||
$out[$k] = walk($x, $fn);
|
||||
}
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
return $v;
|
||||
}
|
||||
|
||||
$translateFn = function (string $s) use (&$cache): string {
|
||||
return translate($s, $cache);
|
||||
};
|
||||
|
||||
foreach (glob($dir.'/*.json') ?: [] as $path) {
|
||||
$name = basename($path);
|
||||
$data = json_decode((string) file_get_contents($path), true, 512, JSON_THROW_ON_ERROR);
|
||||
$out = walk($data, $translateFn);
|
||||
file_put_contents($path, json_encode($out, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)."\n");
|
||||
echo "Updated {$name}\n";
|
||||
}
|
||||
|
||||
file_put_contents($cacheFile, json_encode($cache, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
|
||||
|
||||
echo 'Done. Cache: '.$cacheFile."\n";
|
||||
Loading…
Add table
Add a link
Reference in a new issue