405 lines
16 KiB
Diff
405 lines
16 KiB
Diff
diff --git a/app/Console/Commands/ContactsFindDuplicates.php b/app/Console/Commands/ContactsFindDuplicates.php
|
|
new file mode 100644
|
|
index 0000000..cef982d
|
|
--- /dev/null
|
|
+++ b/app/Console/Commands/ContactsFindDuplicates.php
|
|
@@ -0,0 +1,160 @@
|
|
+<?php
|
|
+
|
|
+namespace App\Console\Commands;
|
|
+
|
|
+use Illuminate\Console\Command;
|
|
+use Illuminate\Support\Facades\DB;
|
|
+
|
|
+/**
|
|
+ * Phase 1 — Schritt 1: Duplikate identifizieren
|
|
+ *
|
|
+ * Sucht Kunden-Datensätze, die vermutlich dieselbe Person repräsentieren.
|
|
+ * Drei Erkennungs-Stufen (absteigend nach Konfidenz):
|
|
+ *
|
|
+ * HIGH — gleiche E-Mail (nicht leer)
|
|
+ * MEDIUM — gleicher Name + Vorname + Geburtsdatum
|
|
+ * LOW — gleicher Name + Vorname + PLZ
|
|
+ *
|
|
+ * Verwendung:
|
|
+ * php artisan contacts:find-duplicates
|
|
+ * php artisan contacts:find-duplicates --export=duplicates.csv
|
|
+ * php artisan contacts:find-duplicates --confidence=HIGH
|
|
+ */
|
|
+class ContactsFindDuplicates extends Command
|
|
+{
|
|
+ protected $signature = 'contacts:find-duplicates
|
|
+ {--export= : Pfad zur CSV-Ausgabedatei}
|
|
+ {--confidence= : Nur diese Konfidenz-Stufe ausgeben (HIGH|MEDIUM|LOW)}';
|
|
+
|
|
+ protected $description = 'Identifiziert doppelte Customer-Datensätze anhand von E-Mail, Name/Geburtsdatum oder Name/PLZ';
|
|
+
|
|
+ public function handle(): int
|
|
+ {
|
|
+ $this->info('Suche nach Duplikaten in der customer-Tabelle...');
|
|
+ $this->newLine();
|
|
+
|
|
+ $groups = collect();
|
|
+
|
|
+ // ── HIGH: gleiche E-Mail ──────────────────────────────────────────
|
|
+ if ($this->shouldCheck('HIGH')) {
|
|
+ $emailDupes = DB::table('customer')
|
|
+ ->select('email', DB::raw('COUNT(*) as cnt'), DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC) as ids'))
|
|
+ ->whereNotNull('email')
|
|
+ ->where('email', '!=', '')
|
|
+ ->whereNull('merged_into_id')
|
|
+ ->groupBy('email')
|
|
+ ->having('cnt', '>', 1)
|
|
+ ->get();
|
|
+
|
|
+ foreach ($emailDupes as $row) {
|
|
+ $groups->push([
|
|
+ 'confidence' => 'HIGH',
|
|
+ 'reason' => 'E-Mail: ' . $row->email,
|
|
+ 'ids' => $row->ids,
|
|
+ 'count' => $row->cnt,
|
|
+ ]);
|
|
+ }
|
|
+
|
|
+ $this->line(sprintf('<fg=green>HIGH</> (gleiche E-Mail): %d Gruppen', $emailDupes->count()));
|
|
+ }
|
|
+
|
|
+ // ── MEDIUM: Name + Vorname + Geburtsdatum ────────────────────────
|
|
+ if ($this->shouldCheck('MEDIUM')) {
|
|
+ $nameBdDupes = DB::table('customer')
|
|
+ ->select(
|
|
+ 'name', 'firstname', 'birthdate',
|
|
+ DB::raw('COUNT(*) as cnt'),
|
|
+ DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC) as ids')
|
|
+ )
|
|
+ ->whereNotNull('name')
|
|
+ ->whereNotNull('firstname')
|
|
+ ->whereNotNull('birthdate')
|
|
+ ->whereNull('merged_into_id')
|
|
+ ->groupBy('name', 'firstname', 'birthdate')
|
|
+ ->having('cnt', '>', 1)
|
|
+ ->get();
|
|
+
|
|
+ foreach ($nameBdDupes as $row) {
|
|
+ $groups->push([
|
|
+ 'confidence' => 'MEDIUM',
|
|
+ 'reason' => "Name: {$row->firstname} {$row->name}, GD: {$row->birthdate}",
|
|
+ 'ids' => $row->ids,
|
|
+ 'count' => $row->cnt,
|
|
+ ]);
|
|
+ }
|
|
+
|
|
+ $this->line(sprintf('<fg=yellow>MEDIUM</> (Name+GD): %d Gruppen', $nameBdDupes->count()));
|
|
+ }
|
|
+
|
|
+ // ── LOW: Name + Vorname + PLZ ─────────────────────────────────────
|
|
+ if ($this->shouldCheck('LOW')) {
|
|
+ $nameZipDupes = DB::table('customer')
|
|
+ ->select(
|
|
+ 'name', 'firstname', 'zip',
|
|
+ DB::raw('COUNT(*) as cnt'),
|
|
+ DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC) as ids')
|
|
+ )
|
|
+ ->whereNotNull('name')
|
|
+ ->whereNotNull('firstname')
|
|
+ ->whereNotNull('zip')
|
|
+ ->where('zip', '!=', '')
|
|
+ ->whereNull('merged_into_id')
|
|
+ ->groupBy('name', 'firstname', 'zip')
|
|
+ ->having('cnt', '>', 1)
|
|
+ ->get();
|
|
+
|
|
+ foreach ($nameZipDupes as $row) {
|
|
+ $groups->push([
|
|
+ 'confidence' => 'LOW',
|
|
+ 'reason' => "Name: {$row->firstname} {$row->name}, PLZ: {$row->zip}",
|
|
+ 'ids' => $row->ids,
|
|
+ 'count' => $row->cnt,
|
|
+ ]);
|
|
+ }
|
|
+
|
|
+ $this->line(sprintf('<fg=red>LOW</> (Name+PLZ): %d Gruppen', $nameZipDupes->count()));
|
|
+ }
|
|
+
|
|
+ $this->newLine();
|
|
+ $this->info(sprintf('Gesamt: %d Duplikat-Gruppen gefunden', $groups->count()));
|
|
+
|
|
+ if ($groups->isEmpty()) {
|
|
+ $this->info('Keine Duplikate — nichts zu tun.');
|
|
+ return self::SUCCESS;
|
|
+ }
|
|
+
|
|
+ // Tabellen-Ausgabe
|
|
+ $this->table(
|
|
+ ['Konfidenz', 'Grund', 'IDs (neueste zuerst)', 'Anzahl'],
|
|
+ $groups->map(fn ($g) => [$g['confidence'], $g['reason'], $g['ids'], $g['count']])->all()
|
|
+ );
|
|
+
|
|
+ // CSV-Export
|
|
+ if ($export = $this->option('export')) {
|
|
+ $this->exportCsv($groups->all(), $export);
|
|
+ $this->info("CSV gespeichert: {$export}");
|
|
+ } else {
|
|
+ $this->newLine();
|
|
+ $this->line('Tipp: --export=duplicates.csv für CSV-Export');
|
|
+ $this->line('Tipp: php artisan contacts:merge-duplicates --dry-run zum Prüfen');
|
|
+ }
|
|
+
|
|
+ return self::SUCCESS;
|
|
+ }
|
|
+
|
|
+ private function shouldCheck(string $level): bool
|
|
+ {
|
|
+ $filter = strtoupper((string) $this->option('confidence'));
|
|
+ return $filter === '' || $filter === $level;
|
|
+ }
|
|
+
|
|
+ private function exportCsv(array $groups, string $path): void
|
|
+ {
|
|
+ $handle = fopen($path, 'w');
|
|
+ fputcsv($handle, ['Konfidenz', 'Grund', 'IDs (neueste zuerst)', 'Anzahl']);
|
|
+ foreach ($groups as $group) {
|
|
+ fputcsv($handle, [$group['confidence'], $group['reason'], $group['ids'], $group['count']]);
|
|
+ }
|
|
+ fclose($handle);
|
|
+ }
|
|
+}
|
|
diff --git a/app/Console/Commands/ContactsMergeDuplicates.php b/app/Console/Commands/ContactsMergeDuplicates.php
|
|
new file mode 100644
|
|
index 0000000..e51ca46
|
|
--- /dev/null
|
|
+++ b/app/Console/Commands/ContactsMergeDuplicates.php
|
|
@@ -0,0 +1,233 @@
|
|
+<?php
|
|
+
|
|
+namespace App\Console\Commands;
|
|
+
|
|
+use Illuminate\Console\Command;
|
|
+use Illuminate\Support\Facades\DB;
|
|
+
|
|
+/**
|
|
+ * Phase 1 — Schritt 2: Duplikate zusammenführen
|
|
+ *
|
|
+ * Strategie: Der neueste Datensatz (höchstes updated_at, dann höchste id)
|
|
+ * wird Master. Alle anderen Datensätze derselben Gruppe erhalten
|
|
+ * merged_into_id = master_id und werden nicht mehr zurückgegeben.
|
|
+ *
|
|
+ * Alle FK-Referenzen in lead, booking, customer_mails, lead_mails
|
|
+ * werden auf den Master umgestellt.
|
|
+ *
|
|
+ * Verwendung:
|
|
+ * php artisan contacts:merge-duplicates --dry-run # Vorschau, keine Änderung
|
|
+ * php artisan contacts:merge-duplicates --confidence=HIGH # Nur sichere Duplikate
|
|
+ * php artisan contacts:merge-duplicates # Ausführen
|
|
+ */
|
|
+class ContactsMergeDuplicates extends Command
|
|
+{
|
|
+ protected $signature = 'contacts:merge-duplicates
|
|
+ {--dry-run : Zeigt was passieren würde, ohne Daten zu ändern}
|
|
+ {--confidence= : Nur diese Konfidenz-Stufe verarbeiten (HIGH|MEDIUM|LOW)}
|
|
+ {--force : Überspringt Sicherheitsabfrage}';
|
|
+
|
|
+ protected $description = 'Führt doppelte Customer-Datensätze zusammen (neuester wird Master)';
|
|
+
|
|
+ private bool $dryRun = false;
|
|
+ private int $mergedCount = 0;
|
|
+ private int $updatedLeads = 0;
|
|
+ private int $updatedBookings = 0;
|
|
+
|
|
+ public function handle(): int
|
|
+ {
|
|
+ $this->dryRun = (bool) $this->option('dry-run');
|
|
+
|
|
+ if ($this->dryRun) {
|
|
+ $this->warn('DRY-RUN Modus — keine Daten werden verändert');
|
|
+ } else {
|
|
+ $this->warn('ACHTUNG: Diese Operation verändert Produktionsdaten.');
|
|
+ if (!$this->option('force') && !$this->confirm('Fortfahren?')) {
|
|
+ $this->info('Abgebrochen.');
|
|
+ return self::SUCCESS;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ $this->newLine();
|
|
+
|
|
+ DB::transaction(function () {
|
|
+ $this->processLevel(
|
|
+ 'HIGH',
|
|
+ fn () => $this->findByEmail()
|
|
+ );
|
|
+
|
|
+ $this->processLevel(
|
|
+ 'MEDIUM',
|
|
+ fn () => $this->findByNameBirthdate()
|
|
+ );
|
|
+
|
|
+ $this->processLevel(
|
|
+ 'LOW',
|
|
+ fn () => $this->findByNameZip()
|
|
+ );
|
|
+ });
|
|
+
|
|
+ $this->newLine();
|
|
+ $this->info(sprintf(
|
|
+ '%s %d Duplikate zusammengeführt | %d leads aktualisiert | %d bookings aktualisiert',
|
|
+ $this->dryRun ? '[DRY-RUN]' : '',
|
|
+ $this->mergedCount,
|
|
+ $this->updatedLeads,
|
|
+ $this->updatedBookings
|
|
+ ));
|
|
+
|
|
+ if ($this->dryRun) {
|
|
+ $this->newLine();
|
|
+ $this->line('Zum Ausführen: php artisan contacts:merge-duplicates');
|
|
+ }
|
|
+
|
|
+ return self::SUCCESS;
|
|
+ }
|
|
+
|
|
+ // ─────────────────────────────────────────────────────────────────────────
|
|
+ // Duplikat-Gruppen ermitteln
|
|
+ // ─────────────────────────────────────────────────────────────────────────
|
|
+
|
|
+ private function findByEmail(): array
|
|
+ {
|
|
+ return DB::table('customer')
|
|
+ ->select('email', DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC, id DESC) as ids'))
|
|
+ ->whereNotNull('email')
|
|
+ ->where('email', '!=', '')
|
|
+ ->whereNull('merged_into_id')
|
|
+ ->groupBy('email')
|
|
+ ->having(DB::raw('COUNT(*)'), '>', 1)
|
|
+ ->pluck('ids')
|
|
+ ->map(fn ($ids) => explode(',', $ids))
|
|
+ ->all();
|
|
+ }
|
|
+
|
|
+ private function findByNameBirthdate(): array
|
|
+ {
|
|
+ return DB::table('customer')
|
|
+ ->select(DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC, id DESC) as ids'))
|
|
+ ->whereNotNull('name')
|
|
+ ->whereNotNull('firstname')
|
|
+ ->whereNotNull('birthdate')
|
|
+ ->whereNull('merged_into_id')
|
|
+ ->groupBy('name', 'firstname', 'birthdate')
|
|
+ ->having(DB::raw('COUNT(*)'), '>', 1)
|
|
+ ->pluck('ids')
|
|
+ ->map(fn ($ids) => explode(',', $ids))
|
|
+ ->all();
|
|
+ }
|
|
+
|
|
+ private function findByNameZip(): array
|
|
+ {
|
|
+ return DB::table('customer')
|
|
+ ->select(DB::raw('GROUP_CONCAT(id ORDER BY updated_at DESC, id DESC) as ids'))
|
|
+ ->whereNotNull('name')
|
|
+ ->whereNotNull('firstname')
|
|
+ ->whereNotNull('zip')
|
|
+ ->where('zip', '!=', '')
|
|
+ ->whereNull('merged_into_id')
|
|
+ ->groupBy('name', 'firstname', 'zip')
|
|
+ ->having(DB::raw('COUNT(*)'), '>', 1)
|
|
+ ->pluck('ids')
|
|
+ ->map(fn ($ids) => explode(',', $ids))
|
|
+ ->all();
|
|
+ }
|
|
+
|
|
+ // ─────────────────────────────────────────────────────────────────────────
|
|
+ // Verarbeitung
|
|
+ // ─────────────────────────────────────────────────────────────────────────
|
|
+
|
|
+ private function processLevel(string $level, callable $finder): void
|
|
+ {
|
|
+ if ($filter = $this->option('confidence')) {
|
|
+ if (strtoupper($filter) !== $level) {
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ $groups = $finder();
|
|
+
|
|
+ if (empty($groups)) {
|
|
+ $this->line("[{$level}] Keine Duplikate.");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ $this->line(sprintf('[%s] %d Gruppe(n) gefunden', $level, count($groups)));
|
|
+
|
|
+ foreach ($groups as $ids) {
|
|
+ $masterId = (int) $ids[0]; // erster = neuester
|
|
+ $duplicateIds = array_map('intval', array_slice($ids, 1));
|
|
+
|
|
+ $this->line(sprintf(
|
|
+ ' Master: #%d ← Duplikate: %s',
|
|
+ $masterId,
|
|
+ implode(', ', array_map(fn ($id) => '#' . $id, $duplicateIds))
|
|
+ ));
|
|
+
|
|
+ foreach ($duplicateIds as $dupeId) {
|
|
+ $this->mergeInto($masterId, $dupeId);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ private function mergeInto(int $masterId, int $dupeId): void
|
|
+ {
|
|
+ // 1. Leads umhängen
|
|
+ $leadCount = DB::table('lead')->where('customer_id', $dupeId)->count();
|
|
+ if ($leadCount > 0) {
|
|
+ $this->line(" lead.customer_id: {$leadCount} Zeile(n) → #{$masterId}");
|
|
+ if (!$this->dryRun) {
|
|
+ DB::table('lead')
|
|
+ ->where('customer_id', $dupeId)
|
|
+ ->update(['customer_id' => $masterId]);
|
|
+ }
|
|
+ $this->updatedLeads += $leadCount;
|
|
+ }
|
|
+
|
|
+ // 2. Bookings umhängen
|
|
+ $bookingCount = DB::table('booking')->where('customer_id', $dupeId)->count();
|
|
+ if ($bookingCount > 0) {
|
|
+ $this->line(" booking.customer_id: {$bookingCount} Zeile(n) → #{$masterId}");
|
|
+ if (!$this->dryRun) {
|
|
+ DB::table('booking')
|
|
+ ->where('customer_id', $dupeId)
|
|
+ ->update(['customer_id' => $masterId]);
|
|
+ }
|
|
+ $this->updatedBookings += $bookingCount;
|
|
+ }
|
|
+
|
|
+ // 3. customer_mails umhängen
|
|
+ $mailCount = DB::table('customer_mails')->where('customer_id', $dupeId)->count();
|
|
+ if ($mailCount > 0) {
|
|
+ $this->line(" customer_mails.customer_id: {$mailCount} Zeile(n) → #{$masterId}");
|
|
+ if (!$this->dryRun) {
|
|
+ DB::table('customer_mails')
|
|
+ ->where('customer_id', $dupeId)
|
|
+ ->update(['customer_id' => $masterId]);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // 4. lead_mails umhängen
|
|
+ $leadMailCount = DB::table('lead_mails')->where('customer_id', $dupeId)->count();
|
|
+ if ($leadMailCount > 0) {
|
|
+ $this->line(" lead_mails.customer_id: {$leadMailCount} Zeile(n) → #{$masterId}");
|
|
+ if (!$this->dryRun) {
|
|
+ DB::table('lead_mails')
|
|
+ ->where('customer_id', $dupeId)
|
|
+ ->update(['customer_id' => $masterId]);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ // 5. Duplikat als zusammengeführt markieren
|
|
+ if (!$this->dryRun) {
|
|
+ DB::table('customer')
|
|
+ ->where('id', $dupeId)
|
|
+ ->update([
|
|
+ 'merged_into_id' => $masterId,
|
|
+ 'merged_at' => now(),
|
|
+ ]);
|
|
+ }
|
|
+
|
|
+ $this->mergedCount++;
|
|
+ }
|
|
+}
|