Files
Calibre-Web-Automated/scripts/fix_po_duplicates.py
T

225 lines
7.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# Calibre-Web Automated fork of Calibre-Web
# Copyright (C) 2018-2025 Calibre-Web contributors
# Copyright (C) 2024-2025 Calibre-Web Automated contributors
# SPDX-License-Identifier: GPL-3.0-or-later
# See CONTRIBUTORS for full list of authors.
"""
Comprehensive script to fix all types of duplicate msgid entries in .po files.
Handles:
1. Regular duplicate msgid entries
2. Duplicates between active and obsolete (#~) entries
3. Multiline msgid duplicates (both regular and obsolete)
"""
import sys
import re
class POEntry:
def __init__(self, msgid, msgstr, line_start, line_end, is_obsolete=False, is_fuzzy=False):
self.msgid = msgid
self.msgstr = msgstr
self.line_start = line_start
self.line_end = line_end
self.is_obsolete = is_obsolete
self.is_fuzzy = is_fuzzy
def normalize_string(s):
"""Normalize a string by removing quotes and whitespace"""
return s.strip().strip('"')
def parse_po_file(filename):
"""Parse a .po file and return all entries including obsolete ones"""
with open(filename, 'r', encoding='utf-8') as f:
lines = f.readlines()
entries = []
i = 0
while i < len(lines):
line = lines[i].strip()
# Skip empty lines and pure comments (not obsolete entries)
if not line or (line.startswith('#') and not line.startswith('#~')):
i += 1
continue
# Check for fuzzy flag
is_fuzzy = False
if line.startswith('#, fuzzy'):
is_fuzzy = True
i += 1
if i >= len(lines):
break
line = lines[i].strip()
# Check for msgid (both regular and obsolete)
if line.startswith('msgid ') or line.startswith('#~ msgid '):
entry_start = i
is_obsolete = line.startswith('#~')
# Extract msgid content
if is_obsolete:
msgid_content = line[9:].strip() # Remove '#~ msgid '
else:
msgid_content = line[6:].strip() # Remove 'msgid '
msgid_content = normalize_string(msgid_content)
# Handle multiline msgids
i += 1
while i < len(lines):
next_line = lines[i].strip()
if is_obsolete and next_line.startswith('#~ "'):
msgid_content += normalize_string(next_line[3:])
elif not is_obsolete and next_line.startswith('"'):
msgid_content += normalize_string(next_line)
else:
break
i += 1
# Look for corresponding msgstr
msgstr_content = ""
if i < len(lines):
msgstr_line = lines[i].strip()
if msgstr_line.startswith('msgstr ') or msgstr_line.startswith('#~ msgstr '):
if is_obsolete:
msgstr_content = msgstr_line[10:].strip() if msgstr_line.startswith('#~ msgstr ') else ""
else:
msgstr_content = msgstr_line[7:].strip() if msgstr_line.startswith('msgstr ') else ""
msgstr_content = normalize_string(msgstr_content)
i += 1
# Handle multiline msgstr
while i < len(lines):
next_line = lines[i].strip()
if is_obsolete and next_line.startswith('#~ "'):
msgstr_content += normalize_string(next_line[3:])
elif not is_obsolete and next_line.startswith('"'):
msgstr_content += normalize_string(next_line)
else:
break
i += 1
# Create entry
entry = POEntry(
msgid=msgid_content,
msgstr=msgstr_content,
line_start=entry_start,
line_end=i - 1,
is_obsolete=is_obsolete,
is_fuzzy=is_fuzzy
)
entries.append(entry)
else:
i += 1
return entries, lines
def find_duplicates(entries):
"""Find all types of duplicate msgid entries"""
msgid_map = {}
duplicates = []
for entry in entries:
if not entry.msgid or entry.msgid == '""': # Skip empty msgids
continue
if entry.msgid in msgid_map:
# Found duplicate
original = msgid_map[entry.msgid]
duplicates.append({
'msgid': entry.msgid,
'original': original,
'duplicate': entry
})
else:
msgid_map[entry.msgid] = entry
return duplicates
def fix_po_file(filename):
"""Fix all types of duplicate entries in a .po file"""
print(f"Checking {filename} for all types of duplicates...")
entries, lines = parse_po_file(filename)
duplicates = find_duplicates(entries)
if not duplicates:
print("No duplicates found.")
return
print(f"Found {len(duplicates)} duplicate msgid entries:")
# Create backup
backup_filename = filename + '.backup'
with open(backup_filename, 'w', encoding='utf-8') as f:
f.writelines(lines)
print(f"Creating backup: {backup_filename}")
# Collect all line ranges to remove (in reverse order)
lines_to_remove = []
for dup in duplicates:
original = dup['original']
duplicate = dup['duplicate']
msgid_preview = dup['msgid'][:50] + ('...' if len(dup['msgid']) > 50 else '')
print(f"\nDuplicate msgid: '{msgid_preview}'")
print(f" Original: line {original.line_start + 1} (obsolete: {original.is_obsolete})")
print(f" Duplicate: line {duplicate.line_start + 1} (obsolete: {duplicate.is_obsolete})")
# Prefer to keep active entries over obsolete ones
if original.is_obsolete and not duplicate.is_obsolete:
# Remove original, keep duplicate
lines_to_remove.append((original.line_start, original.line_end + 1))
print(f" -> Removing original obsolete entry at lines {original.line_start + 1}-{original.line_end + 1}")
else:
# Remove duplicate, keep original
lines_to_remove.append((duplicate.line_start, duplicate.line_end + 1))
print(f" -> Removing duplicate entry at lines {duplicate.line_start + 1}-{duplicate.line_end + 1}")
# Sort by start line in reverse order to maintain line numbers while removing
lines_to_remove.sort(key=lambda x: x[0], reverse=True)
# Remove duplicate entries
removed_lines = 0
for start, end in lines_to_remove:
del lines[start:end]
removed_lines += end - start
# Write fixed file
with open(filename, 'w', encoding='utf-8') as f:
f.writelines(lines)
print(f"\nFixed! Removed {removed_lines} lines containing duplicates.")
print(f"File has been updated: {filename}")
def main():
if len(sys.argv) != 2:
print("Usage: python3 fix_po_duplicates.py <file.po>")
print("\nThis script fixes all types of duplicate msgid entries in .po files:")
print("- Regular duplicate msgid entries")
print("- Duplicates between active and obsolete (#~) entries")
print("- Multiline msgid duplicates")
sys.exit(1)
filename = sys.argv[1]
if not filename.endswith('.po'):
print("Warning: File doesn't have .po extension")
try:
fix_po_file(filename)
except FileNotFoundError:
print(f"Error: File '{filename}' not found")
sys.exit(1)
except Exception as e:
print(f"Error processing file: {e}")
sys.exit(1)
if __name__ == "__main__":
main()