Add CSV export of mariage table and merged GEDCOM
- export_mariages_csv.py : extrait la table `mariage` depuis basesgen.sql en 4 fichiers CSV classés par type d'acte (BMS, CM+TABLE CM, TABLE MARIAGES, ETAT CIVIL) ; ajoute 4 colonnes dérivées PERE/MERE_EPOUX/EPOUSE_DECEDE(E) détectées via le suffixe '+' sur les noms de parents ; encodage UTF-8 (corrige la lecture latin-1 initiale qui produisait des artefacts type "Félix") - csv_export/ : 4 fichiers générés (380 892 enregistrements au total) - merge_gedcom.py : fusionne les GEDCOM individuels en renumérotant les identifiants INDI/FAM pour éviter les collisions - lignees.ged : fusion des 16 exports (4 299 individus, 1 484 familles) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+113
@@ -0,0 +1,113 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Merge multiple GEDCOM files into one, renumbering INDI and FAM IDs to avoid collisions."""
|
||||
|
||||
import glob
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
INPUT_DIR = "gedcom_output"
|
||||
OUTPUT_FILE = "lignees.ged"
|
||||
|
||||
def parse_gedcom(path):
|
||||
"""Return (header_note, records) where records is a list of raw record strings."""
|
||||
with open(path, encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
lines = content.splitlines()
|
||||
records = []
|
||||
current = []
|
||||
header_note = ""
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("0 "):
|
||||
if current:
|
||||
tag = current[0]
|
||||
if tag == "0 HEAD":
|
||||
# Extract the NOTE line for source tracking
|
||||
for l in current:
|
||||
if l.startswith("1 NOTE "):
|
||||
header_note = l[7:]
|
||||
elif tag != "0 TRLR":
|
||||
records.append("\n".join(current))
|
||||
current = [line]
|
||||
else:
|
||||
current.append(line)
|
||||
|
||||
if current and current[0] not in ("0 TRLR", "0 HEAD"):
|
||||
records.append("\n".join(current))
|
||||
|
||||
return header_note, records
|
||||
|
||||
|
||||
def renumber_records(records, indi_offset, fam_offset):
|
||||
"""Replace @Innnnn@ and @Fnnnnn@ references with offset-adjusted IDs."""
|
||||
def replace_id(m):
|
||||
kind = m.group(1)
|
||||
num = int(m.group(2))
|
||||
if kind == "I":
|
||||
return f"@I{num + indi_offset:04d}@"
|
||||
else:
|
||||
return f"@F{num + fam_offset:04d}@"
|
||||
|
||||
result = []
|
||||
for record in records:
|
||||
renumbered = re.sub(r"@([IF])(\d+)@", replace_id, record)
|
||||
result.append(renumbered)
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
ged_files = sorted(glob.glob(os.path.join(INPUT_DIR, "*.ged")))
|
||||
if not ged_files:
|
||||
print(f"No .ged files found in {INPUT_DIR}/", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Found {len(ged_files)} GEDCOM files to merge.")
|
||||
|
||||
all_records = []
|
||||
indi_offset = 0
|
||||
fam_offset = 0
|
||||
sources = []
|
||||
|
||||
for path in ged_files:
|
||||
basename = os.path.basename(path)
|
||||
note, records = parse_gedcom(path)
|
||||
sources.append(note or basename)
|
||||
|
||||
indi_count = sum(1 for r in records if r.startswith("0 @I"))
|
||||
fam_count = sum(1 for r in records if r.startswith("0 @F"))
|
||||
|
||||
renumbered = renumber_records(records, indi_offset, fam_offset)
|
||||
all_records.extend(renumbered)
|
||||
|
||||
print(f" {basename}: {indi_count} INDI, {fam_count} FAM (offset I+{indi_offset}, F+{fam_offset})")
|
||||
indi_offset += indi_count
|
||||
fam_offset += fam_count
|
||||
|
||||
total_indi = sum(1 for r in all_records if r.startswith("0 @I"))
|
||||
total_fam = sum(1 for r in all_records if r.startswith("0 @F"))
|
||||
print(f"\nTotal: {total_indi} INDI, {total_fam} FAM → {OUTPUT_FILE}")
|
||||
|
||||
with open(OUTPUT_FILE, "w", encoding="utf-8") as out:
|
||||
out.write("0 HEAD\n")
|
||||
out.write("1 SOUR BaseCGL\n")
|
||||
out.write("2 NAME CGL Bases généalogiques du Languedoc – basesgen.sql\n")
|
||||
out.write("1 GEDC\n")
|
||||
out.write("2 VERS 5.5.1\n")
|
||||
out.write("2 FORM LINEAGE-LINKED\n")
|
||||
out.write("1 CHAR UTF-8\n")
|
||||
out.write("1 NOTE Lignées CGL – fusion de 16 exports GEDCOM\n")
|
||||
for src in sources:
|
||||
out.write(f"2 CONT {src}\n")
|
||||
out.write("\n")
|
||||
for record in all_records:
|
||||
out.write(record)
|
||||
out.write("\n\n")
|
||||
out.write("0 TRLR\n")
|
||||
|
||||
print("Done.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user