160 lines
6.1 KiB
PHP
160 lines
6.1 KiB
PHP
<?php
|
|
|
|
namespace App\Console\Commands;
|
|
|
|
use Illuminate\Console\Command;
|
|
use Illuminate\Support\Facades\DB;
|
|
|
|
/**
|
|
* Phase 1 — Schritt 1: Duplikate identifizieren
|
|
*
|
|
* Sucht Kunden-Datensätze, die vermutlich dieselbe Person repräsentieren.
|
|
* Drei Erkennungs-Stufen (absteigend nach Konfidenz):
|
|
*
|
|
* HIGH — gleiche E-Mail (nicht leer)
|
|
* MEDIUM — gleicher Name + Vorname + Geburtsdatum
|
|
* LOW — gleicher Name + Vorname + PLZ
|
|
*
|
|
* Verwendung:
|
|
* php artisan contacts:find-duplicates
|
|
* php artisan contacts:find-duplicates --export=duplicates.csv
|
|
* php artisan contacts:find-duplicates --confidence=HIGH
|
|
*/
|
|
class ContactsFindDuplicates extends Command
|
|
{
|
|
protected $signature = 'contacts:find-duplicates
|
|
{--export= : Pfad zur CSV-Ausgabedatei}
|
|
{--confidence= : Nur diese Konfidenz-Stufe ausgeben (HIGH|MEDIUM|LOW)}';
|
|
|
|
protected $description = 'Identifiziert doppelte Customer-Datensätze anhand von E-Mail, Name/Geburtsdatum oder Name/PLZ';
|
|
|
|
public function handle(): int
|
|
{
|
|
$this->info('Suche nach Duplikaten in der customer-Tabelle...');
|
|
$this->newLine();
|
|
|
|
$groups = collect();
|
|
|
|
// ── HIGH: gleiche E-Mail ──────────────────────────────────────────
|
|
if ($this->shouldCheck('HIGH')) {
|
|
$emailDupes = DB::table('contacts')
|
|
->select('email', DB::raw('COUNT(*) as cnt'), DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC) as ids'))
|
|
->whereNotNull('email')
|
|
->where('email', '!=', '')
|
|
->whereNull('merged_into_id')
|
|
->groupBy('email')
|
|
->having('cnt', '>', 1)
|
|
->get();
|
|
|
|
foreach ($emailDupes as $row) {
|
|
$groups->push([
|
|
'confidence' => 'HIGH',
|
|
'reason' => 'E-Mail: ' . $row->email,
|
|
'ids' => $row->ids,
|
|
'count' => $row->cnt,
|
|
]);
|
|
}
|
|
|
|
$this->line(sprintf('<fg=green>HIGH</> (gleiche E-Mail): %d Gruppen', $emailDupes->count()));
|
|
}
|
|
|
|
// ── MEDIUM: Name + Vorname + Geburtsdatum ────────────────────────
|
|
if ($this->shouldCheck('MEDIUM')) {
|
|
$nameBdDupes = DB::table('contacts')
|
|
->select(
|
|
'name', 'firstname', 'birthdate',
|
|
DB::raw('COUNT(*) as cnt'),
|
|
DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC) as ids')
|
|
)
|
|
->whereNotNull('name')
|
|
->whereNotNull('firstname')
|
|
->whereNotNull('birthdate')
|
|
->whereNull('merged_into_id')
|
|
->groupBy('name', 'firstname', 'birthdate')
|
|
->having('cnt', '>', 1)
|
|
->get();
|
|
|
|
foreach ($nameBdDupes as $row) {
|
|
$groups->push([
|
|
'confidence' => 'MEDIUM',
|
|
'reason' => "Name: {$row->firstname} {$row->name}, GD: {$row->birthdate}",
|
|
'ids' => $row->ids,
|
|
'count' => $row->cnt,
|
|
]);
|
|
}
|
|
|
|
$this->line(sprintf('<fg=yellow>MEDIUM</> (Name+GD): %d Gruppen', $nameBdDupes->count()));
|
|
}
|
|
|
|
// ── LOW: Name + Vorname + PLZ ─────────────────────────────────────
|
|
if ($this->shouldCheck('LOW')) {
|
|
$nameZipDupes = DB::table('contacts')
|
|
->select(
|
|
'name', 'firstname', 'zip',
|
|
DB::raw('COUNT(*) as cnt'),
|
|
DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC) as ids')
|
|
)
|
|
->whereNotNull('name')
|
|
->whereNotNull('firstname')
|
|
->whereNotNull('zip')
|
|
->where('zip', '!=', '')
|
|
->whereNull('merged_into_id')
|
|
->groupBy('name', 'firstname', 'zip')
|
|
->having('cnt', '>', 1)
|
|
->get();
|
|
|
|
foreach ($nameZipDupes as $row) {
|
|
$groups->push([
|
|
'confidence' => 'LOW',
|
|
'reason' => "Name: {$row->firstname} {$row->name}, PLZ: {$row->zip}",
|
|
'ids' => $row->ids,
|
|
'count' => $row->cnt,
|
|
]);
|
|
}
|
|
|
|
$this->line(sprintf('<fg=red>LOW</> (Name+PLZ): %d Gruppen', $nameZipDupes->count()));
|
|
}
|
|
|
|
$this->newLine();
|
|
$this->info(sprintf('Gesamt: %d Duplikat-Gruppen gefunden', $groups->count()));
|
|
|
|
if ($groups->isEmpty()) {
|
|
$this->info('Keine Duplikate — nichts zu tun.');
|
|
return self::SUCCESS;
|
|
}
|
|
|
|
// Tabellen-Ausgabe
|
|
$this->table(
|
|
['Konfidenz', 'Grund', 'IDs (neueste zuerst)', 'Anzahl'],
|
|
$groups->map(fn ($g) => [$g['confidence'], $g['reason'], $g['ids'], $g['count']])->all()
|
|
);
|
|
|
|
// CSV-Export
|
|
if ($export = $this->option('export')) {
|
|
$this->exportCsv($groups->all(), $export);
|
|
$this->info("CSV gespeichert: {$export}");
|
|
} else {
|
|
$this->newLine();
|
|
$this->line('Tipp: --export=duplicates.csv für CSV-Export');
|
|
$this->line('Tipp: php artisan contacts:merge-duplicates --dry-run zum Prüfen');
|
|
}
|
|
|
|
return self::SUCCESS;
|
|
}
|
|
|
|
private function shouldCheck(string $level): bool
|
|
{
|
|
$filter = strtoupper((string) $this->option('confidence'));
|
|
return $filter === '' || $filter === $level;
|
|
}
|
|
|
|
private function exportCsv(array $groups, string $path): void
|
|
{
|
|
$handle = fopen($path, 'w');
|
|
fputcsv($handle, ['Konfidenz', 'Grund', 'IDs (neueste zuerst)', 'Anzahl']);
|
|
foreach ($groups as $group) {
|
|
fputcsv($handle, [$group['confidence'], $group['reason'], $group['ids'], $group['count']]);
|
|
}
|
|
fclose($handle);
|
|
}
|
|
}
|