diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..a393fb7
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,184 @@
+import os
+import logging
+import warnings
+from tqdm import tqdm
+from openpyxl import Workbook, load_workbook
+import msoffcrypto
+from io import BytesIO
+from copy import copy, deepcopy
+import zipfile
+import re
+
+warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl.reader.workbook')
+
+PROTECTION_PATTERNS = [
+ r"]*(?:/>|>.*?)",
+ r"]*(?:/>|>.*?)",
+ r"]*(?:/>|>.*?)",
+ r"]*(?:/>|>.*?)",
+ r"]*(?:/>|>.*?)",
+]
+
+def setup_logging():
+ logging.basicConfig(
+ level=logging.DEBUG,
+ format='%(asctime)s [%(levelname)s] %(message)s',
+ handlers=[
+ logging.StreamHandler(),
+ ]
+ )
+
+def load_workbook_with_possible_passwords(filepath, passwords, keep_vba=False, data_only=False):
+ try:
+ wb = load_workbook(filename=filepath, keep_vba=keep_vba, data_only=data_only)
+ logging.info(f"Successfully loaded workbook: {filepath}")
+ return wb
+ except Exception:
+ logging.warning(f"Failed to load workbook normally, trying with passwords for: {filepath}")
+
+ for pwd in passwords:
+ try:
+ decrypted = BytesIO()
+ with open(filepath, 'rb') as f:
+ office_file = msoffcrypto.OfficeFile(f)
+ office_file.load_key(password=pwd.strip())
+ office_file.decrypt(decrypted)
+
+ decrypted.seek(0)
+ wb = load_workbook(filename=decrypted, keep_vba=keep_vba, data_only=data_only)
+ logging.info(f"Successfully decrypted '{filepath}' with password: '{pwd.strip()}'")
+ return wb
+ except Exception:
+ logging.debug(f"Password '{pwd.strip()}' did not work for '{filepath}'")
+ continue
+
+ raise ValueError(f"None of the provided passwords worked for '{filepath}'")
+
+def copy_excel_file(source_path, destination_path, passwords):
+ logging.info(f"Processing Excel file: {source_path}")
+ is_xlsm = source_path.lower().endswith('.xlsm')
+ source_wb = load_workbook_with_possible_passwords(
+ filepath=source_path,
+ passwords=passwords,
+ keep_vba=is_xlsm,
+ data_only=False
+ )
+
+ dest_wb = Workbook()
+ if len(dest_wb.sheetnames) == 1 and dest_wb.active.title == 'Sheet':
+ dest_wb.remove(dest_wb.active)
+
+ for sheet_name in source_wb.sheetnames:
+ logging.debug(f"Copying sheet: {sheet_name}")
+ source_sheet = source_wb[sheet_name]
+ dest_sheet = dest_wb.create_sheet(title=sheet_name)
+
+ for row in source_sheet.iter_rows():
+ for cell in row:
+ dest_cell = dest_sheet.cell(row=cell.row, column=cell.column)
+ dest_cell.value = cell.value
+ if cell.has_style:
+ dest_cell.font = copy(cell.font)
+ dest_cell.border = copy(cell.border)
+ dest_cell.fill = copy(cell.fill)
+ dest_cell.number_format = cell.number_format
+ dest_cell.protection = copy(cell.protection)
+ dest_cell.alignment = copy(cell.alignment)
+
+ os.makedirs(os.path.dirname(destination_path), exist_ok=True)
+ logging.debug(f"Destination directory ensured: {os.path.dirname(destination_path)}")
+
+ dest_wb.save(destination_path)
+ logging.info(f"Saved copied file to: {destination_path}")
+ dest_wb.close()
+ source_wb.close()
+
+def remove_all_protection_tags(docx_path, output_path):
+ logging.info(f"Processing Word document: {docx_path}")
+ with zipfile.ZipFile(docx_path, 'r') as zip_in:
+ file_list = zip_in.namelist()
+
+ with zipfile.ZipFile(output_path, 'w', compression=zipfile.ZIP_DEFLATED) as zip_out:
+ for item in tqdm(file_list, desc="Processing XML files"):
+ data = zip_in.read(item)
+
+ if item.lower().endswith('.xml'):
+ text = data.decode('utf-8', errors='ignore')
+ for pattern in PROTECTION_PATTERNS:
+ text = re.sub(pattern, "", text, flags=re.DOTALL)
+ data = text.encode('utf-8')
+
+ zip_out.writestr(item, data)
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
+ logging.debug(f"Output directory ensured: {os.path.dirname(output_path)}")
+ logging.info(f"Saved cleaned file to: {output_path}")
+
+def main():
+ setup_logging()
+ print("\nChoose the file type to process:")
+ print("1. Excel files")
+ print("2. Word documents")
+ choice = input("Enter your choice (1 or 2): ").strip()
+
+ if choice == '1':
+ source_dir = input("Enter the source folder with Excel files: ").strip()
+ dest_dir = input("Enter the destination folder for copied files: ").strip()
+ os.makedirs(dest_dir, exist_ok=True)
+
+ password_option = input("Choose password option (file/single/none): ").strip().lower()
+ passwords = []
+
+ if password_option == 'file':
+ password_file = input("Enter the path to the password file: ").strip()
+ with open(password_file, 'r', encoding='utf-8') as pf:
+ passwords = [line.strip() for line in pf if line.strip()]
+ elif password_option == 'single':
+ single_password = input("Enter the password: ").strip()
+ passwords = [single_password]
+
+ files = [
+ os.path.join(root, file)
+ for root, _, files in os.walk(source_dir)
+ for file in files
+ if file.lower().endswith(('.xlsx', '.xlsm'))
+ ]
+
+ for source_path in tqdm(files, desc="Copying Excel Files"):
+ relative_path = os.path.relpath(os.path.dirname(source_path), source_dir)
+ dest_path = os.path.join(dest_dir, relative_path, os.path.basename(source_path))
+ os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+ logging.debug(f"Ensured destination path: {dest_path}")
+
+ try:
+ copy_excel_file(source_path, dest_path, passwords)
+ except Exception as e:
+ logging.error(f"Failed to copy {source_path}: {e}")
+
+ elif choice == '2':
+ source_dir = input("Enter the source folder with Word files: ").strip()
+ dest_dir = input("Enter the destination folder for cleaned files: ").strip()
+ os.makedirs(dest_dir, exist_ok=True)
+
+ files = [
+ os.path.join(root, file)
+ for root, _, files in os.walk(source_dir)
+ for file in files
+ if file.lower().endswith(('.docx', '.docm'))
+ ]
+
+ for source_path in tqdm(files, desc="Removing Word Protections"):
+ relative_path = os.path.relpath(os.path.dirname(source_path), source_dir)
+ dest_path = os.path.join(dest_dir, relative_path, os.path.basename(source_path))
+ os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+ logging.debug(f"Ensured destination path: {dest_path}")
+
+ try:
+ remove_all_protection_tags(source_path, dest_path)
+ except Exception as e:
+ logging.error(f"Failed to clean {source_path}: {e}")
+
+ else:
+ logging.error("Invalid choice. Please restart the script and choose a valid option.")
+
+if __name__ == "__main__":
+ main()