mivita/app/Console/Commands/BackfillFrenchDatabaseTranslations.php

551 lines
19 KiB
PHP

<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use Illuminate\Http\Client\RequestException;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Schema;
use Illuminate\Support\Str;
/*
php artisan translation:backfill-french-db --dry-run
php artisan translation:backfill-french-db
php artisan translation:backfill-french-db --models=products,ingredients --limit=10 --dry-run
php artisan translation:backfill-french-db --overwrite
*/
class BackfillFrenchDatabaseTranslations extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'translation:backfill-french-db
{--driver=openai : Translation driver: openai or copy-source}
{--models= : Comma-separated models: products,ingredients,categories,shippings,user_levels,dashboard_news}
{--source=de : Source language}
{--target=fr : Target language}
{--limit= : Limit source rows per model}
{--overwrite : Replace existing non-empty target translations}
{--test-api : Translate sample texts and print them without touching the database}
{--dry-run : Show planned writes without changing the database}';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Backfills French database translations for the admin translation module';
/**
* Execute the console command.
*/
public function handle(): int
{
$driver = (string) $this->option('driver');
$sourceLanguage = Str::lower((string) $this->option('source'));
$targetLanguage = Str::lower((string) $this->option('target'));
$dryRun = (bool) $this->option('dry-run');
$overwrite = (bool) $this->option('overwrite');
$limit = $this->option('limit') !== null ? (int) $this->option('limit') : null;
if (! in_array($driver, ['openai', 'copy-source'], true)) {
$this->error('Unsupported driver. Use openai or copy-source.');
return self::FAILURE;
}
if ($driver === 'openai' && blank(config('services.openai.api_key'))) {
$this->error('OPENAI_API_KEY is missing. Set it on the live server or use --driver=copy-source for a dry run.');
return self::FAILURE;
}
if ((bool) $this->option('test-api')) {
return $this->runApiTest($driver, $sourceLanguage, $targetLanguage);
}
$models = $this->selectedModels();
if ($models === []) {
$this->error('No valid models selected.');
return self::FAILURE;
}
$this->ensureTargetLanguage($targetLanguage, $dryRun);
$summary = [
'created' => 0,
'updated' => 0,
'skipped' => 0,
'empty' => 0,
];
foreach ($models as $modelName => $spec) {
$this->info("Processing {$modelName}...");
try {
$modelSummary = $this->backfillModel($modelName, $spec, $driver, $sourceLanguage, $targetLanguage, $overwrite, $dryRun, $limit);
} catch (RequestException $exception) {
$this->reportOpenAiException($exception);
return self::FAILURE;
}
foreach ($summary as $key => $value) {
$summary[$key] = $value + $modelSummary[$key];
}
}
$this->newLine();
$this->info("Created: {$summary['created']}");
$this->info("Updated: {$summary['updated']}");
$this->info("Skipped existing: {$summary['skipped']}");
$this->info("Skipped empty source: {$summary['empty']}");
return self::SUCCESS;
}
private function runApiTest(string $driver, string $sourceLanguage, string $targetLanguage): int
{
if ($driver !== 'openai') {
$this->error('The API test requires --driver=openai.');
return self::FAILURE;
}
$this->info('OpenAI translation API test');
$this->line('Model: '.config('services.openai.model'));
$this->line("Language: {$sourceLanguage} -> {$targetLanguage}");
$this->newLine();
foreach ($this->apiTestSamples() as $index => $sourceValue) {
try {
$translatedValue = $this->translateWithOpenAI($sourceValue, $sourceLanguage, $targetLanguage);
} catch (RequestException $exception) {
$this->reportOpenAiException($exception);
return self::FAILURE;
}
$this->line('['.($index + 1).'] DE: '.$sourceValue);
$this->line('['.($index + 1).'] FR: '.$translatedValue);
$this->newLine();
}
$this->info('API test completed.');
return self::SUCCESS;
}
/**
* @return array<int, string>
*/
private function apiTestSamples(): array
{
return [
'Aloe Vera Gel für die tägliche Pflege der Haut.',
'Der Berater kann seinem Kunden ein passendes Abo empfehlen.',
'MIVITA Produktbeschreibung mit :amount ml Inhalt und PayPal Zahlung.',
];
}
private function reportOpenAiException(RequestException $exception): void
{
$response = $exception->response;
$status = $response->status();
$errorCode = (string) $response->json('error.code');
$errorType = (string) $response->json('error.type');
$message = (string) ($response->json('error.message') ?: $exception->getMessage());
$this->error("OpenAI API request failed with HTTP {$status}.");
if ($errorCode !== '') {
$this->line("Code: {$errorCode}");
}
if ($errorType !== '') {
$this->line("Type: {$errorType}");
}
$this->line("Message: {$message}");
if ($status === 429 || $errorCode === 'insufficient_quota') {
$this->warn('Bitte prüfe im OpenAI Dashboard das Billing, das Projekt-Budget, Usage-Limits und ob der OPENAI_API_KEY zum richtigen Projekt gehört.');
}
}
/**
* @param array<string, mixed> $spec
* @return array{created: int, updated: int, skipped: int, empty: int}
*/
private function backfillModel(
string $modelName,
array $spec,
string $driver,
string $sourceLanguage,
string $targetLanguage,
bool $overwrite,
bool $dryRun,
?int $limit
): array {
$summary = [
'created' => 0,
'updated' => 0,
'skipped' => 0,
'empty' => 0,
];
$query = DB::table($spec['source_table'])
->select(array_merge(['id'], $spec['fields']))
->orderBy('id');
if (Schema::hasColumn($spec['source_table'], 'deleted_at')) {
$query->whereNull('deleted_at');
}
if ($limit !== null && $limit > 0) {
$query->limit($limit);
}
$totalRows = $this->countRows($spec, $limit);
$totalFields = count($spec['fields']);
$currentRow = 0;
$this->line("Status {$modelName}: {$totalRows} Datensätze, {$totalFields} Felder.");
foreach ($query->cursor() as $row) {
$currentRow++;
$this->line("Datensatz {$currentRow}/{$totalRows}: {$modelName}#{$row->id}");
foreach ($spec['fields'] as $field) {
$statusTarget = "{$modelName}#{$row->id}.{$field}";
$sourceValue = trim((string) ($row->{$field} ?? ''));
if ($sourceValue === '') {
$summary['empty']++;
$this->line(" - {$statusTarget}: Quelle leer, übersprungen.");
continue;
}
$existingValue = $this->existingTranslationValue($spec, (int) $row->id, $field, $targetLanguage);
if (! $overwrite && filled($existingValue)) {
$summary['skipped']++;
$this->line(" - {$statusTarget}: vorhandene Übersetzung, übersprungen.");
continue;
}
$this->line(" - {$statusTarget}: ".$this->translationStatusText($field, $spec, $driver).'...');
$translatedValue = $this->translateValue($sourceValue, $field, $spec, $driver, $sourceLanguage, $targetLanguage);
if ($dryRun) {
$action = $existingValue === null ? 'create' : 'update';
$this->line(" - {$statusTarget}: [dry-run] würde {$action}.");
} else {
$this->storeTranslationValue($spec, (int) $row->id, $field, $targetLanguage, $translatedValue);
$action = $existingValue === null ? 'erstellt' : 'aktualisiert';
$this->line(" - {$statusTarget}: gespeichert ({$action}).");
}
if ($existingValue === null) {
$summary['created']++;
} else {
$summary['updated']++;
}
}
}
return $summary;
}
/**
* @param array<string, mixed> $spec
*/
private function existingTranslationValue(array $spec, int $sourceId, string $field, string $targetLanguage): ?string
{
if (($spec['storage'] ?? 'table') === 'json') {
$translations = $this->jsonTranslations($spec, $sourceId, $field);
$value = $translations[$targetLanguage] ?? null;
return $value === null ? null : trim((string) $value);
}
return DB::table($spec['translation_table'])
->where([
'language' => $targetLanguage,
$spec['foreign_key'] => $sourceId,
'key' => $field,
])
->value('value');
}
/**
* @param array<string, mixed> $spec
*/
private function storeTranslationValue(array $spec, int $sourceId, string $field, string $targetLanguage, string $translatedValue): void
{
if (($spec['storage'] ?? 'table') === 'json') {
$translationColumn = 'trans_'.$field;
$translations = $this->jsonTranslations($spec, $sourceId, $field);
$translations[$targetLanguage] = $translatedValue;
$data = [
$translationColumn => json_encode($translations, JSON_UNESCAPED_UNICODE),
];
if (Schema::hasColumn($spec['source_table'], 'updated_at')) {
$data['updated_at'] = now();
}
DB::table($spec['source_table'])
->where('id', $sourceId)
->update($data);
return;
}
DB::table($spec['translation_table'])->updateOrInsert(
[
'language' => $targetLanguage,
$spec['foreign_key'] => $sourceId,
'key' => $field,
],
[
'value' => $translatedValue,
'created_at' => now(),
'updated_at' => now(),
]
);
}
/**
* @param array<string, mixed> $spec
* @return array<string, string>
*/
private function jsonTranslations(array $spec, int $sourceId, string $field): array
{
$translationColumn = 'trans_'.$field;
$value = DB::table($spec['source_table'])
->where('id', $sourceId)
->value($translationColumn);
if (is_array($value)) {
return $value;
}
$decodedValue = json_decode((string) $value, true);
return is_array($decodedValue) ? $decodedValue : [];
}
/**
* @param array<string, mixed> $spec
*/
private function countRows(array $spec, ?int $limit): int
{
$query = DB::table($spec['source_table']);
if (Schema::hasColumn($spec['source_table'], 'deleted_at')) {
$query->whereNull('deleted_at');
}
$count = $query->count();
if ($limit !== null && $limit > 0) {
return min($count, $limit);
}
return $count;
}
/**
* @param array<string, mixed> $spec
*/
private function translationStatusText(string $field, array $spec, string $driver): string
{
if ($driver === 'copy-source' || in_array($field, $spec['copy_fields'] ?? [], true)) {
return 'übernehme Quelle';
}
return 'übersetze via OpenAI';
}
/**
* @param array<string, mixed> $spec
*/
private function translateValue(string $sourceValue, string $field, array $spec, string $driver, string $sourceLanguage, string $targetLanguage): string
{
if ($driver === 'copy-source' || in_array($field, $spec['copy_fields'] ?? [], true)) {
return $sourceValue;
}
return $this->translateWithOpenAI($sourceValue, $sourceLanguage, $targetLanguage);
}
private function translateWithOpenAI(string $sourceValue, string $sourceLanguage, string $targetLanguage): string
{
[$preparedValue, $protectedValues] = $this->protectTerms($sourceValue);
$payload = [
'model' => config('services.openai.model'),
'temperature' => 0.1,
'messages' => [
[
'role' => 'system',
'content' => implode(' ', [
'You translate ecommerce and MLM CRM content for mivita.care.',
'Translate from German to French unless another source/target language is requested.',
'Return only the translated text, without quotes, notes, markdown, explanations, or alternative variants.',
'Preserve HTML tags, URLs, numbers, units, placeholders, and tokens like __MIVITA_TRANSLATION_TOKEN_0__ exactly.',
'Keep brand names and protected product terms unchanged.',
'Use consistent terminology: Berater = conseiller, Kunde = client, Abo = abonnement.',
]),
],
[
'role' => 'user',
'content' => "Source language: {$sourceLanguage}\nTarget language: {$targetLanguage}\nText:\n{$preparedValue}",
],
],
];
$response = Http::withToken((string) config('services.openai.api_key'))
->acceptJson()
->timeout((int) config('services.openai.timeout', 60))
->retry(2, 1000, function ($exception): bool {
if ($exception instanceof RequestException && $exception->response->status() === 429) {
return false;
}
return true;
})
->post((string) config('services.openai.url'), $payload)
->throw()
->json('choices.0.message.content');
return $this->restoreTerms(trim((string) $response), $protectedValues);
}
/**
* @return array{0: string, 1: array<string, string>}
*/
private function protectTerms(string $value): array
{
$protectedValues = [];
$patterns = [
'/(:[A-Za-z_][A-Za-z0-9_-]*)/u',
'/(\{\{\s*[^}]+\s*\}\})/u',
'/\b(MIVITA|PAYONE|PayPal|DHL|INCI|CBD|GRÜNE SEELE|Aloe Vera)\b/u',
];
foreach ($patterns as $pattern) {
$value = preg_replace_callback($pattern, function (array $matches) use (&$protectedValues): string {
$token = '__MIVITA_TRANSLATION_TOKEN_'.count($protectedValues).'__';
$protectedValues[$token] = $matches[1];
return $token;
}, $value);
}
return [$value, $protectedValues];
}
/**
* @param array<string, string> $protectedValues
*/
private function restoreTerms(string $value, array $protectedValues): string
{
return str_replace(array_keys($protectedValues), array_values($protectedValues), $value);
}
private function ensureTargetLanguage(string $targetLanguage, bool $dryRun): void
{
if ($dryRun) {
$this->line("[dry-run] ensure trans_languages.{$targetLanguage}");
return;
}
DB::table('trans_languages')->updateOrInsert(
['language' => $targetLanguage],
[
'name' => $targetLanguage === 'fr' ? 'Französisch' : Str::upper($targetLanguage),
'created_at' => now(),
'updated_at' => now(),
]
);
}
/**
* @return array<string, array<string, mixed>>
*/
private function selectedModels(): array
{
$availableModels = $this->translationModels();
$selectedModels = $this->option('models');
if (blank($selectedModels)) {
return $availableModels;
}
return collect(explode(',', (string) $selectedModels))
->map(fn (string $model): string => trim($model))
->filter()
->mapWithKeys(fn (string $model): array => [$model => $availableModels[$model] ?? null])
->filter()
->all();
}
/**
* @return array<string, array<string, mixed>>
*/
private function translationModels(): array
{
return [
'products' => [
'source_table' => 'products',
'translation_table' => 'trans_products',
'foreign_key' => 'product_id',
'fields' => ['name', 'copy', 'description', 'usage', 'ingredients'],
],
'ingredients' => [
'source_table' => 'ingredients',
'translation_table' => 'trans_ingredients',
'foreign_key' => 'ingredient_id',
'fields' => ['name', 'inci', 'effect'],
'copy_fields' => ['inci'],
],
'categories' => [
'source_table' => 'categories',
'translation_table' => 'trans_categories',
'foreign_key' => 'categorie_id',
'fields' => ['name', 'headline'],
],
'shippings' => [
'source_table' => 'shippings',
'translation_table' => 'trans_shippings',
'foreign_key' => 'shipping_id',
'fields' => ['name'],
],
'user_levels' => [
'source_table' => 'user_levels',
'translation_table' => 'trans_user_levels',
'foreign_key' => 'user_level_id',
'fields' => ['name'],
],
'dashboard_news' => [
'storage' => 'json',
'source_table' => 'dashboard_news',
'fields' => ['title', 'teaser', 'content'],
],
];
}
}