From eb857222936a0989a1f12ecc9ac64f62dd92ff20 Mon Sep 17 00:00:00 2001
From: Nicola Benaglia <nico.benaz@gmail.com>
Date: Tue, 20 May 2025 08:24:25 +0200
Subject: [PATCH] Add python script

---
 .gitignore              |  5 ++-
 scripts/i18n-checker.py | 89 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 scripts/i18n-checker.py

diff --git a/.gitignore b/.gitignore
index 979a042..756e09b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,4 +24,7 @@ dist-ssr
 *.sln
 *.sw?
 release-builds/
-.env
\ No newline at end of file
+.env
+
+# reports from scripts
+scripts/i18n_report*
\ No newline at end of file
diff --git a/scripts/i18n-checker.py b/scripts/i18n-checker.py
new file mode 100644
index 0000000..604d222
--- /dev/null
+++ b/scripts/i18n-checker.py
@@ -0,0 +1,89 @@
+import os
+import re
+import json
+import csv
+import argparse
+
+# Customize as needed
+I18N_FUNCTIONS = ['t', 'i18next.t']
+FILE_EXTENSIONS = ['.tsx']
+EXCLUDED_DIRS = ['node_modules', 'build', 'dist']
+
+# Regex patterns
+STRING_LITERAL_REGEX = re.compile(r'(?<!t\()\s*["\']([A-Z][^"\']{2,})["\']')
+JSX_TEXT_REGEX = re.compile(r'>\s*([A-Z][a-z].*?)\s*<')
+
+def is_excluded(path):
+    return any(excluded in path for excluded in EXCLUDED_DIRS)
+
+def find_untranslated_strings(file_path):
+    issues = []
+    with open(file_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+        # Match suspicious string literals
+        for match in STRING_LITERAL_REGEX.finditer(content):
+            string = match.group(1)
+            if not any(fn + '(' in content[match.start()-10:match.start()] for fn in I18N_FUNCTIONS):
+                issues.append({
+                    'file': file_path,
+                    'position': match.start(),
+                    'type': 'StringLiteral',
+                    'text': string.strip()
+                })
+
+        # Match JSX text nodes
+        for match in JSX_TEXT_REGEX.finditer(content):
+            text = match.group(1)
+            if not text.strip().startswith('{t('):  # naive check
+                issues.append({
+                    'file': file_path,
+                    'position': match.start(),
+                    'type': 'JSXText',
+                    'text': text.strip()
+                })
+
+    return issues
+
+def scan_directory(directory):
+    all_issues = []
+    for root, _, files in os.walk(directory):
+        if is_excluded(root):
+            continue
+        for file in files:
+            if any(file.endswith(ext) for ext in FILE_EXTENSIONS):
+                file_path = os.path.join(root, file)
+                issues = find_untranslated_strings(file_path)
+                all_issues.extend(issues)
+    return all_issues
+
+def save_report(results, output_file):
+    _, ext = os.path.splitext(output_file)
+    if ext.lower() == '.json':
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(results, f, indent=2)
+    elif ext.lower() == '.csv':
+        with open(output_file, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=['file', 'position', 'type', 'text'])
+            writer.writeheader()
+            for row in results:
+                writer.writerow(row)
+    else:
+        raise ValueError("Unsupported output format. Use .json or .csv")
+
+def main():
+    parser = argparse.ArgumentParser(description='Detect untranslated strings in React (.tsx) files.')
+    parser.add_argument('-path', default='../src/', help='Path to the source directory (e.g. ./src)')
+    parser.add_argument('-o', '--output', default='./i18n_report.json', help='Report output file (.json or .csv)')
+
+    args = parser.parse_args()
+    results = scan_directory(args.path)
+
+    if results:
+        save_report(results, args.output)
+        print(f"⚠️  Found {len(results)} potential untranslated strings. Report saved to {args.output}")
+    else:
+        print("✅ No obvious untranslated strings found.")
+
+if __name__ == "__main__":
+    main()