mein-sterntours/app/Console/Commands/ContactsFindDuplicates.php
2026-04-17 17:19:11 +02:00

160 lines
6.1 KiB
PHP

<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\DB;
/**
* Phase 1 — Schritt 1: Duplikate identifizieren
*
* Sucht Kunden-Datensätze, die vermutlich dieselbe Person repräsentieren.
* Drei Erkennungs-Stufen (absteigend nach Konfidenz):
*
* HIGH — gleiche E-Mail (nicht leer)
* MEDIUM — gleicher Name + Vorname + Geburtsdatum
* LOW — gleicher Name + Vorname + PLZ
*
* Verwendung:
* php artisan contacts:find-duplicates
* php artisan contacts:find-duplicates --export=duplicates.csv
* php artisan contacts:find-duplicates --confidence=HIGH
*/
class ContactsFindDuplicates extends Command
{
protected $signature = 'contacts:find-duplicates
{--export= : Pfad zur CSV-Ausgabedatei}
{--confidence= : Nur diese Konfidenz-Stufe ausgeben (HIGH|MEDIUM|LOW)}';
protected $description = 'Identifiziert doppelte Customer-Datensätze anhand von E-Mail, Name/Geburtsdatum oder Name/PLZ';
public function handle(): int
{
$this->info('Suche nach Duplikaten in der customer-Tabelle...');
$this->newLine();
$groups = collect();
// ── HIGH: gleiche E-Mail ──────────────────────────────────────────
if ($this->shouldCheck('HIGH')) {
$emailDupes = DB::table('customer')
->select('email', DB::raw('COUNT(*) as cnt'), DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC) as ids'))
->whereNotNull('email')
->where('email', '!=', '')
->whereNull('merged_into_id')
->groupBy('email')
->having('cnt', '>', 1)
->get();
foreach ($emailDupes as $row) {
$groups->push([
'confidence' => 'HIGH',
'reason' => 'E-Mail: ' . $row->email,
'ids' => $row->ids,
'count' => $row->cnt,
]);
}
$this->line(sprintf('<fg=green>HIGH</> (gleiche E-Mail): %d Gruppen', $emailDupes->count()));
}
// ── MEDIUM: Name + Vorname + Geburtsdatum ────────────────────────
if ($this->shouldCheck('MEDIUM')) {
$nameBdDupes = DB::table('customer')
->select(
'name', 'firstname', 'birthdate',
DB::raw('COUNT(*) as cnt'),
DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC) as ids')
)
->whereNotNull('name')
->whereNotNull('firstname')
->whereNotNull('birthdate')
->whereNull('merged_into_id')
->groupBy('name', 'firstname', 'birthdate')
->having('cnt', '>', 1)
->get();
foreach ($nameBdDupes as $row) {
$groups->push([
'confidence' => 'MEDIUM',
'reason' => "Name: {$row->firstname} {$row->name}, GD: {$row->birthdate}",
'ids' => $row->ids,
'count' => $row->cnt,
]);
}
$this->line(sprintf('<fg=yellow>MEDIUM</> (Name+GD): %d Gruppen', $nameBdDupes->count()));
}
// ── LOW: Name + Vorname + PLZ ─────────────────────────────────────
if ($this->shouldCheck('LOW')) {
$nameZipDupes = DB::table('customer')
->select(
'name', 'firstname', 'zip',
DB::raw('COUNT(*) as cnt'),
DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC) as ids')
)
->whereNotNull('name')
->whereNotNull('firstname')
->whereNotNull('zip')
->where('zip', '!=', '')
->whereNull('merged_into_id')
->groupBy('name', 'firstname', 'zip')
->having('cnt', '>', 1)
->get();
foreach ($nameZipDupes as $row) {
$groups->push([
'confidence' => 'LOW',
'reason' => "Name: {$row->firstname} {$row->name}, PLZ: {$row->zip}",
'ids' => $row->ids,
'count' => $row->cnt,
]);
}
$this->line(sprintf('<fg=red>LOW</> (Name+PLZ): %d Gruppen', $nameZipDupes->count()));
}
$this->newLine();
$this->info(sprintf('Gesamt: %d Duplikat-Gruppen gefunden', $groups->count()));
if ($groups->isEmpty()) {
$this->info('Keine Duplikate — nichts zu tun.');
return self::SUCCESS;
}
// Tabellen-Ausgabe
$this->table(
['Konfidenz', 'Grund', 'IDs (neueste zuerst)', 'Anzahl'],
$groups->map(fn ($g) => [$g['confidence'], $g['reason'], $g['ids'], $g['count']])->all()
);
// CSV-Export
if ($export = $this->option('export')) {
$this->exportCsv($groups->all(), $export);
$this->info("CSV gespeichert: {$export}");
} else {
$this->newLine();
$this->line('Tipp: --export=duplicates.csv für CSV-Export');
$this->line('Tipp: php artisan contacts:merge-duplicates --dry-run zum Prüfen');
}
return self::SUCCESS;
}
private function shouldCheck(string $level): bool
{
$filter = strtoupper((string) $this->option('confidence'));
return $filter === '' || $filter === $level;
}
private function exportCsv(array $groups, string $path): void
{
$handle = fopen($path, 'w');
fputcsv($handle, ['Konfidenz', 'Grund', 'IDs (neueste zuerst)', 'Anzahl']);
foreach ($groups as $group) {
fputcsv($handle, [$group['confidence'], $group['reason'], $group['ids'], $group['count']]);
}
fclose($handle);
}
}