#!/usr/bin/env python3 """Extract translatable strings (text-domain gdpr-content-blocker) from the plugin PHP.""" import re, sys, os, glob FUNCS = r"(?:esc_html__|esc_html_e|esc_attr__|esc_attr_e|esc_attr_e|__|_e|esc_attr_e)" # capture first single-quoted arg PAT = re.compile(FUNCS + r"\s*\(\s*'((?:[^'\\]|\\.)*)'\s*,\s*'gdpr-content-blocker'") def unescape(s): return s.replace("\\'", "'").replace('\\\\', '\\') base = sys.argv[1] seen = [] seenset = set() for path in glob.glob(os.path.join(base, '**', '*.php'), recursive=True): with open(path, encoding='utf-8') as f: txt = f.read() for m in PAT.finditer(txt): s = unescape(m.group(1)) if s not in seenset: seenset.add(s) seen.append(s) out = sys.argv[2] if len(sys.argv) > 2 else 'strings.txt' with open(out, 'w', encoding='utf-8') as f: f.write(f"# {len(seen)} unique strings\n") for s in seen: f.write("MSGID\t" + s + "\n") print(f"{len(seen)} strings written to {out}")