#!/usr/bin/env python3 """Merge multiple GEDCOM files into one, renumbering INDI and FAM IDs to avoid collisions.""" import glob import os import re import sys INPUT_DIR = "gedcom_output" OUTPUT_FILE = "lignees.ged" def parse_gedcom(path): """Return (header_note, records) where records is a list of raw record strings.""" with open(path, encoding="utf-8") as f: content = f.read() lines = content.splitlines() records = [] current = [] header_note = "" for line in lines: if line.startswith("0 "): if current: tag = current[0] if tag == "0 HEAD": # Extract the NOTE line for source tracking for l in current: if l.startswith("1 NOTE "): header_note = l[7:] elif tag != "0 TRLR": records.append("\n".join(current)) current = [line] else: current.append(line) if current and current[0] not in ("0 TRLR", "0 HEAD"): records.append("\n".join(current)) return header_note, records def renumber_records(records, indi_offset, fam_offset): """Replace @Innnnn@ and @Fnnnnn@ references with offset-adjusted IDs.""" def replace_id(m): kind = m.group(1) num = int(m.group(2)) if kind == "I": return f"@I{num + indi_offset:04d}@" else: return f"@F{num + fam_offset:04d}@" result = [] for record in records: renumbered = re.sub(r"@([IF])(\d+)@", replace_id, record) result.append(renumbered) return result def main(): ged_files = sorted(glob.glob(os.path.join(INPUT_DIR, "*.ged"))) if not ged_files: print(f"No .ged files found in {INPUT_DIR}/", file=sys.stderr) sys.exit(1) print(f"Found {len(ged_files)} GEDCOM files to merge.") all_records = [] indi_offset = 0 fam_offset = 0 sources = [] for path in ged_files: basename = os.path.basename(path) note, records = parse_gedcom(path) sources.append(note or basename) indi_count = sum(1 for r in records if r.startswith("0 @I")) fam_count = sum(1 for r in records if r.startswith("0 @F")) renumbered = renumber_records(records, indi_offset, fam_offset) all_records.extend(renumbered) print(f" {basename}: {indi_count} INDI, {fam_count} FAM (offset I+{indi_offset}, F+{fam_offset})") indi_offset += indi_count fam_offset += fam_count total_indi = sum(1 for r in all_records if r.startswith("0 @I")) total_fam = sum(1 for r in all_records if r.startswith("0 @F")) print(f"\nTotal: {total_indi} INDI, {total_fam} FAM → {OUTPUT_FILE}") with open(OUTPUT_FILE, "w", encoding="utf-8") as out: out.write("0 HEAD\n") out.write("1 SOUR BaseCGL\n") out.write("2 NAME CGL Bases généalogiques du Languedoc – basesgen.sql\n") out.write("1 GEDC\n") out.write("2 VERS 5.5.1\n") out.write("2 FORM LINEAGE-LINKED\n") out.write("1 CHAR UTF-8\n") out.write("1 NOTE Lignées CGL – fusion de 16 exports GEDCOM\n") for src in sources: out.write(f"2 CONT {src}\n") out.write("\n") for record in all_records: out.write(record) out.write("\n\n") out.write("0 TRLR\n") print("Done.") if __name__ == "__main__": main()