Compare commits
10 Commits
79b2bb0835
...
c6dc3455ac
| Author | SHA1 | Date | |
|---|---|---|---|
| c6dc3455ac | |||
| f11023e424 | |||
| 51a635880a | |||
| 0c8ad4a8aa | |||
| 6a580785a8 | |||
| 9b9322ddec | |||
| ac5306ed30 | |||
| b8698d41b8 | |||
| 43afbf694d | |||
| 6b5c851a00 |
41
.gitignore
vendored
Normal file
41
.gitignore
vendored
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
# Aider
|
||||||
|
.aider*
|
||||||
|
|
||||||
|
# Environment
|
||||||
|
.env
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
.env/
|
||||||
|
.venv/
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.idea/
|
||||||
|
.vscode/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
184
src/main.py
Normal file
184
src/main.py
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import warnings
|
||||||
|
from tqdm import tqdm
|
||||||
|
from openpyxl import Workbook, load_workbook
|
||||||
|
import msoffcrypto
|
||||||
|
from io import BytesIO
|
||||||
|
from copy import copy, deepcopy
|
||||||
|
import zipfile
|
||||||
|
import re
|
||||||
|
|
||||||
|
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl.reader.workbook')
|
||||||
|
|
||||||
|
PROTECTION_PATTERNS = [
|
||||||
|
r"<w:documentProtection[^>]*(?:/>|>.*?</w:documentProtection>)",
|
||||||
|
r"<w:writeProtection[^>]*(?:/>|>.*?</w:writeProtection>)",
|
||||||
|
r"<w:commentProtection[^>]*(?:/>|>.*?</w:commentProtection>)",
|
||||||
|
r"<w:revisionProtection[^>]*(?:/>|>.*?</w:revisionProtection>)",
|
||||||
|
r"<w:trackRevisions[^>]*(?:/>|>.*?</w:trackRevisions>)",
|
||||||
|
]
|
||||||
|
|
||||||
|
def setup_logging():
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.DEBUG,
|
||||||
|
format='%(asctime)s [%(levelname)s] %(message)s',
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler(),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
def load_workbook_with_possible_passwords(filepath, passwords, keep_vba=False, data_only=False):
|
||||||
|
try:
|
||||||
|
wb = load_workbook(filename=filepath, keep_vba=keep_vba, data_only=data_only)
|
||||||
|
logging.info(f"Successfully loaded workbook: {filepath}")
|
||||||
|
return wb
|
||||||
|
except Exception:
|
||||||
|
logging.warning(f"Failed to load workbook normally, trying with passwords for: {filepath}")
|
||||||
|
|
||||||
|
for pwd in passwords:
|
||||||
|
try:
|
||||||
|
decrypted = BytesIO()
|
||||||
|
with open(filepath, 'rb') as f:
|
||||||
|
office_file = msoffcrypto.OfficeFile(f)
|
||||||
|
office_file.load_key(password=pwd.strip())
|
||||||
|
office_file.decrypt(decrypted)
|
||||||
|
|
||||||
|
decrypted.seek(0)
|
||||||
|
wb = load_workbook(filename=decrypted, keep_vba=keep_vba, data_only=data_only)
|
||||||
|
logging.info(f"Successfully decrypted '{filepath}' with password: '{pwd.strip()}'")
|
||||||
|
return wb
|
||||||
|
except Exception:
|
||||||
|
logging.debug(f"Password '{pwd.strip()}' did not work for '{filepath}'")
|
||||||
|
continue
|
||||||
|
|
||||||
|
raise ValueError(f"None of the provided passwords worked for '{filepath}'")
|
||||||
|
|
||||||
|
def copy_excel_file(source_path, destination_path, passwords):
|
||||||
|
logging.info(f"Processing Excel file: {source_path}")
|
||||||
|
is_xlsm = source_path.lower().endswith('.xlsm')
|
||||||
|
source_wb = load_workbook_with_possible_passwords(
|
||||||
|
filepath=source_path,
|
||||||
|
passwords=passwords,
|
||||||
|
keep_vba=is_xlsm,
|
||||||
|
data_only=False
|
||||||
|
)
|
||||||
|
|
||||||
|
dest_wb = Workbook()
|
||||||
|
if len(dest_wb.sheetnames) == 1 and dest_wb.active.title == 'Sheet':
|
||||||
|
dest_wb.remove(dest_wb.active)
|
||||||
|
|
||||||
|
for sheet_name in source_wb.sheetnames:
|
||||||
|
logging.debug(f"Copying sheet: {sheet_name}")
|
||||||
|
source_sheet = source_wb[sheet_name]
|
||||||
|
dest_sheet = dest_wb.create_sheet(title=sheet_name)
|
||||||
|
|
||||||
|
for row in source_sheet.iter_rows():
|
||||||
|
for cell in row:
|
||||||
|
dest_cell = dest_sheet.cell(row=cell.row, column=cell.column)
|
||||||
|
dest_cell.value = cell.value
|
||||||
|
if cell.has_style:
|
||||||
|
dest_cell.font = copy(cell.font)
|
||||||
|
dest_cell.border = copy(cell.border)
|
||||||
|
dest_cell.fill = copy(cell.fill)
|
||||||
|
dest_cell.number_format = cell.number_format
|
||||||
|
dest_cell.protection = copy(cell.protection)
|
||||||
|
dest_cell.alignment = copy(cell.alignment)
|
||||||
|
|
||||||
|
os.makedirs(os.path.dirname(destination_path), exist_ok=True)
|
||||||
|
logging.debug(f"Destination directory ensured: {os.path.dirname(destination_path)}")
|
||||||
|
|
||||||
|
dest_wb.save(destination_path)
|
||||||
|
logging.info(f"Saved copied file to: {destination_path}")
|
||||||
|
dest_wb.close()
|
||||||
|
source_wb.close()
|
||||||
|
|
||||||
|
def remove_all_protection_tags(docx_path, output_path):
|
||||||
|
logging.info(f"Processing Word document: {docx_path}")
|
||||||
|
with zipfile.ZipFile(docx_path, 'r') as zip_in:
|
||||||
|
file_list = zip_in.namelist()
|
||||||
|
|
||||||
|
with zipfile.ZipFile(output_path, 'w', compression=zipfile.ZIP_DEFLATED) as zip_out:
|
||||||
|
for item in tqdm(file_list, desc="Processing XML files"):
|
||||||
|
data = zip_in.read(item)
|
||||||
|
|
||||||
|
if item.lower().endswith('.xml'):
|
||||||
|
text = data.decode('utf-8', errors='ignore')
|
||||||
|
for pattern in PROTECTION_PATTERNS:
|
||||||
|
text = re.sub(pattern, "", text, flags=re.DOTALL)
|
||||||
|
data = text.encode('utf-8')
|
||||||
|
|
||||||
|
zip_out.writestr(item, data)
|
||||||
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
||||||
|
logging.debug(f"Output directory ensured: {os.path.dirname(output_path)}")
|
||||||
|
logging.info(f"Saved cleaned file to: {output_path}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
setup_logging()
|
||||||
|
print("\nChoose the file type to process:")
|
||||||
|
print("1. Excel files")
|
||||||
|
print("2. Word documents")
|
||||||
|
choice = input("Enter your choice (1 or 2): ").strip()
|
||||||
|
|
||||||
|
if choice == '1':
|
||||||
|
source_dir = input("Enter the source folder with Excel files: ").strip()
|
||||||
|
dest_dir = input("Enter the destination folder for copied files: ").strip()
|
||||||
|
os.makedirs(dest_dir, exist_ok=True)
|
||||||
|
|
||||||
|
password_option = input("Choose password option (file/single/none): ").strip().lower()
|
||||||
|
passwords = []
|
||||||
|
|
||||||
|
if password_option == 'file':
|
||||||
|
password_file = input("Enter the path to the password file: ").strip()
|
||||||
|
with open(password_file, 'r', encoding='utf-8') as pf:
|
||||||
|
passwords = [line.strip() for line in pf if line.strip()]
|
||||||
|
elif password_option == 'single':
|
||||||
|
single_password = input("Enter the password: ").strip()
|
||||||
|
passwords = [single_password]
|
||||||
|
|
||||||
|
files = [
|
||||||
|
os.path.join(root, file)
|
||||||
|
for root, _, files in os.walk(source_dir)
|
||||||
|
for file in files
|
||||||
|
if file.lower().endswith(('.xlsx', '.xlsm'))
|
||||||
|
]
|
||||||
|
|
||||||
|
for source_path in tqdm(files, desc="Copying Excel Files"):
|
||||||
|
relative_path = os.path.relpath(os.path.dirname(source_path), source_dir)
|
||||||
|
dest_path = os.path.join(dest_dir, relative_path, os.path.basename(source_path))
|
||||||
|
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
||||||
|
logging.debug(f"Ensured destination path: {dest_path}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
copy_excel_file(source_path, dest_path, passwords)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to copy {source_path}: {e}")
|
||||||
|
|
||||||
|
elif choice == '2':
|
||||||
|
source_dir = input("Enter the source folder with Word files: ").strip()
|
||||||
|
dest_dir = input("Enter the destination folder for cleaned files: ").strip()
|
||||||
|
os.makedirs(dest_dir, exist_ok=True)
|
||||||
|
|
||||||
|
files = [
|
||||||
|
os.path.join(root, file)
|
||||||
|
for root, _, files in os.walk(source_dir)
|
||||||
|
for file in files
|
||||||
|
if file.lower().endswith(('.docx', '.docm'))
|
||||||
|
]
|
||||||
|
|
||||||
|
for source_path in tqdm(files, desc="Removing Word Protections"):
|
||||||
|
relative_path = os.path.relpath(os.path.dirname(source_path), source_dir)
|
||||||
|
dest_path = os.path.join(dest_dir, relative_path, os.path.basename(source_path))
|
||||||
|
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
||||||
|
logging.debug(f"Ensured destination path: {dest_path}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
remove_all_protection_tags(source_path, dest_path)
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to clean {source_path}: {e}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
logging.error("Invalid choice. Please restart the script and choose a valid option.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@ -2,6 +2,8 @@ import streamlit as st
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import warnings
|
import warnings
|
||||||
|
import shutil
|
||||||
|
from io import BytesIO
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import tempfile
|
import tempfile
|
||||||
import zipfile
|
import zipfile
|
||||||
@ -27,12 +29,53 @@ st.set_page_config(
|
|||||||
setup_logging()
|
setup_logging()
|
||||||
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl.reader.workbook')
|
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl.reader.workbook')
|
||||||
|
|
||||||
|
def add_to_error_log(error_dict, filepath, error):
|
||||||
|
"""Add error to the error dictionary with file path as key"""
|
||||||
|
error_dict[filepath] = str(error)
|
||||||
|
|
||||||
def save_uploaded_file(uploaded_file):
|
def save_uploaded_file(uploaded_file):
|
||||||
"""Save an uploaded file to a temporary location"""
|
"""Save an uploaded file to a temporary location"""
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
|
||||||
tmp_file.write(uploaded_file.getvalue())
|
tmp_file.write(uploaded_file.getvalue())
|
||||||
return tmp_file.name
|
return tmp_file.name
|
||||||
|
|
||||||
|
def get_file_path(title="Select File", file_types=[("Text Files", "*.txt")]):
|
||||||
|
"""Get file path using native file dialog when possible"""
|
||||||
|
try:
|
||||||
|
if platform.system() == "Windows":
|
||||||
|
import tkinter as tk
|
||||||
|
from tkinter import filedialog
|
||||||
|
root = tk.Tk()
|
||||||
|
root.withdraw() # Hide the main window
|
||||||
|
root.attributes('-topmost', True) # Bring dialog to front
|
||||||
|
path = filedialog.askopenfilename(title=title, filetypes=file_types)
|
||||||
|
return path if path else None
|
||||||
|
elif platform.system() == "Linux":
|
||||||
|
# Use zenity for Linux systems
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
['zenity', '--file-selection', '--title', title],
|
||||||
|
capture_output=True,
|
||||||
|
text=True
|
||||||
|
)
|
||||||
|
return result.stdout.strip() if result.returncode == 0 else None
|
||||||
|
except FileNotFoundError:
|
||||||
|
return None
|
||||||
|
elif platform.system() == "Darwin": # macOS
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
['osascript', '-e', f'choose file with prompt "{title}"'],
|
||||||
|
capture_output=True,
|
||||||
|
text=True
|
||||||
|
)
|
||||||
|
return result.stdout.strip() if result.returncode == 0 else None
|
||||||
|
except FileNotFoundError:
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Error opening file dialog: {str(e)}")
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
def get_directory_path(title="Select Directory"):
|
def get_directory_path(title="Select Directory"):
|
||||||
"""Get directory path using native file dialog when possible"""
|
"""Get directory path using native file dialog when possible"""
|
||||||
try:
|
try:
|
||||||
@ -91,28 +134,49 @@ with st.sidebar:
|
|||||||
help="Select the type of files you want to process"
|
help="Select the type of files you want to process"
|
||||||
)
|
)
|
||||||
|
|
||||||
processing_mode = st.radio(
|
|
||||||
"Choose processing mode:",
|
|
||||||
("File Upload", "Directory Processing"),
|
|
||||||
help="Process uploaded files or select directories"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Main content area
|
# Main content area
|
||||||
if processing_mode == "File Upload":
|
st.header(f"{file_type} File Processing")
|
||||||
if file_type == "Excel":
|
|
||||||
st.header("Excel File Processing")
|
|
||||||
|
|
||||||
col1, col2 = st.columns(2)
|
col1, col2 = st.columns(2)
|
||||||
|
|
||||||
with col1:
|
with col1:
|
||||||
uploaded_files = st.file_uploader(
|
# Input method selection
|
||||||
"Upload Excel files",
|
input_method = st.radio(
|
||||||
type=["xlsx", "xlsm"],
|
"Choose input method:",
|
||||||
accept_multiple_files=True,
|
("Upload Files", "Select Directory"),
|
||||||
help="You can upload multiple Excel files"
|
help="Upload files directly or process from local directory"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if input_method == "Upload Files":
|
||||||
|
uploaded_files = st.file_uploader(
|
||||||
|
f"Upload {file_type} files",
|
||||||
|
type=["xlsx", "xlsm"] if file_type == "Excel" else ["docx", "docm"],
|
||||||
|
accept_multiple_files=True,
|
||||||
|
help=f"You can upload multiple {file_type} files"
|
||||||
|
)
|
||||||
|
else: # Select Directory
|
||||||
|
source_dir = st.text_input("Source Directory Path",
|
||||||
|
value=st.session_state.get('source_dir', ''),
|
||||||
|
help="Enter the full path to the directory containing files to process")
|
||||||
|
source_browse = st.button("Browse Source Directory")
|
||||||
|
if source_browse:
|
||||||
|
path = get_directory_path("Select Source Directory")
|
||||||
|
if path:
|
||||||
|
st.session_state['source_dir'] = path
|
||||||
|
st.experimental_rerun()
|
||||||
|
|
||||||
|
dest_dir = st.text_input("Destination Directory Path",
|
||||||
|
value=st.session_state.get('dest_dir', ''),
|
||||||
|
help="Enter the full path where processed files will be saved")
|
||||||
|
dest_browse = st.button("Browse Destination Directory")
|
||||||
|
if dest_browse:
|
||||||
|
path = get_directory_path("Select Destination Directory")
|
||||||
|
if path:
|
||||||
|
st.session_state['dest_dir'] = path
|
||||||
|
st.experimental_rerun()
|
||||||
|
|
||||||
with col2:
|
with col2:
|
||||||
|
if file_type == "Excel":
|
||||||
password_option = st.radio(
|
password_option = st.radio(
|
||||||
"Password Option:",
|
"Password Option:",
|
||||||
("No Password", "Single Password", "Password File")
|
("No Password", "Single Password", "Password File")
|
||||||
@ -124,54 +188,53 @@ if processing_mode == "File Upload":
|
|||||||
if password:
|
if password:
|
||||||
passwords = [password]
|
passwords = [password]
|
||||||
elif password_option == "Password File":
|
elif password_option == "Password File":
|
||||||
|
if input_method == "Upload Files":
|
||||||
password_file = st.file_uploader("Upload password file", type=["txt"])
|
password_file = st.file_uploader("Upload password file", type=["txt"])
|
||||||
if password_file:
|
if password_file:
|
||||||
content = password_file.getvalue().decode()
|
content = password_file.getvalue().decode()
|
||||||
passwords = [line.strip() for line in content.splitlines() if line.strip()]
|
passwords = [line.strip() for line in content.splitlines() if line.strip()]
|
||||||
st.info(f"Loaded {len(passwords)} passwords")
|
st.info(f"Loaded {len(passwords)} passwords")
|
||||||
|
else: # Select Directory
|
||||||
|
password_path = st.text_input("Password File Path",
|
||||||
|
help="Enter the full path to the text file containing passwords",
|
||||||
|
value=st.session_state.get('password_path', ''))
|
||||||
|
password_browse = st.button("Browse Password File")
|
||||||
|
if password_browse:
|
||||||
|
path = get_file_path("Select Password File", [("Text Files", "*.txt")])
|
||||||
|
if path:
|
||||||
|
st.session_state['password_path'] = path
|
||||||
|
st.experimental_rerun()
|
||||||
|
|
||||||
else: # Word
|
if password_path and os.path.exists(password_path):
|
||||||
st.header("Word Document Processing")
|
with open(password_path, 'r', encoding='utf-8') as pf:
|
||||||
uploaded_files = st.file_uploader(
|
passwords = [line.strip() for line in pf if line.strip()]
|
||||||
"Upload Word files",
|
st.info(f"Loaded {len(passwords)} passwords from file")
|
||||||
type=["docx", "docm"],
|
|
||||||
accept_multiple_files=True,
|
|
||||||
help="You can upload multiple Word files"
|
|
||||||
)
|
|
||||||
|
|
||||||
if uploaded_files:
|
# Process button and logic
|
||||||
if st.button(f"Process {file_type} Files", type="primary"):
|
if input_method == "Upload Files" and uploaded_files and st.button("Process Files", type="primary"):
|
||||||
progress_bar = st.progress(0)
|
progress_bar = st.progress(0)
|
||||||
status_text = st.empty()
|
status_text = st.empty()
|
||||||
|
|
||||||
# Dictionary to store processed files for zip download
|
|
||||||
processed_files = {}
|
processed_files = {}
|
||||||
|
|
||||||
for idx, uploaded_file in enumerate(uploaded_files):
|
for idx, uploaded_file in enumerate(uploaded_files):
|
||||||
try:
|
try:
|
||||||
# Create a container for each file
|
|
||||||
with st.expander(f"Processing {uploaded_file.name}", expanded=True):
|
with st.expander(f"Processing {uploaded_file.name}", expanded=True):
|
||||||
st.write(f"📝 Processing {uploaded_file.name}...")
|
st.write(f"📝 Processing {uploaded_file.name}...")
|
||||||
|
|
||||||
# Save uploaded file temporarily
|
|
||||||
temp_input_path = save_uploaded_file(uploaded_file)
|
temp_input_path = save_uploaded_file(uploaded_file)
|
||||||
temp_output_path = os.path.join(
|
temp_output_path = f"{temp_input_path}_processed"
|
||||||
os.path.dirname(temp_input_path),
|
|
||||||
f"processed_{os.path.basename(temp_input_path)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Process based on file type
|
|
||||||
if file_type == "Excel":
|
if file_type == "Excel":
|
||||||
copy_excel_file(temp_input_path, temp_output_path, passwords)
|
copy_excel_file(temp_input_path, temp_output_path, passwords)
|
||||||
else: # Word
|
else: # Word
|
||||||
remove_all_protection_tags(temp_input_path, temp_output_path)
|
remove_all_protection_tags(temp_input_path, temp_output_path)
|
||||||
|
|
||||||
# Provide download button
|
|
||||||
with open(temp_output_path, "rb") as f:
|
with open(temp_output_path, "rb") as f:
|
||||||
processed_file = f.read()
|
processed_file = f.read()
|
||||||
processed_files[f"processed_{uploaded_file.name}"] = processed_file
|
processed_files[f"processed_{uploaded_file.name}"] = processed_file
|
||||||
|
|
||||||
mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if file_type == "Excel" else "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
mime_type = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||||
|
if file_type == "Excel" else
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||||
|
|
||||||
st.download_button(
|
st.download_button(
|
||||||
label=f"⬇️ Download processed file",
|
label=f"⬇️ Download processed file",
|
||||||
@ -180,24 +243,19 @@ if uploaded_files:
|
|||||||
mime=mime_type
|
mime=mime_type
|
||||||
)
|
)
|
||||||
|
|
||||||
# Cleanup temporary files
|
|
||||||
os.unlink(temp_input_path)
|
os.unlink(temp_input_path)
|
||||||
os.unlink(temp_output_path)
|
os.unlink(temp_output_path)
|
||||||
|
|
||||||
st.success("✅ Processing complete!")
|
st.success("✅ Processing complete!")
|
||||||
|
|
||||||
# Update progress
|
|
||||||
progress = (idx + 1) / len(uploaded_files)
|
progress = (idx + 1) / len(uploaded_files)
|
||||||
progress_bar.progress(progress)
|
progress_bar.progress(progress)
|
||||||
status_text.text(f"Processed {idx + 1} of {len(uploaded_files)} files")
|
status_text.text(f"Processed {idx + 1} of {len(uploaded_files)} files")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
st.error(f"❌ Error processing {uploaded_file.name}: {str(e)}")
|
error_msg = f"❌ Error processing {uploaded_file.name}: {str(e)}"
|
||||||
|
st.error(error_msg)
|
||||||
|
st.session_state.error_log[uploaded_file.name] = str(e)
|
||||||
|
|
||||||
progress_bar.empty()
|
|
||||||
status_text.text("✨ All processing complete!")
|
|
||||||
|
|
||||||
# Add zip download button if multiple files were processed
|
|
||||||
if len(processed_files) > 1:
|
if len(processed_files) > 1:
|
||||||
zip_buffer = create_zip_file(processed_files)
|
zip_buffer = create_zip_file(processed_files)
|
||||||
st.download_button(
|
st.download_button(
|
||||||
@ -207,64 +265,7 @@ if uploaded_files:
|
|||||||
mime="application/zip",
|
mime="application/zip",
|
||||||
)
|
)
|
||||||
|
|
||||||
else: # Directory Processing
|
elif input_method == "Select Directory" and source_dir and dest_dir and st.button("Process Directory", type="primary"):
|
||||||
st.header("Directory Processing")
|
|
||||||
|
|
||||||
col1, col2 = st.columns(2)
|
|
||||||
|
|
||||||
with col1:
|
|
||||||
source_dir = st.text_input("Source Directory Path",
|
|
||||||
help="Enter the full path to the directory containing files to process")
|
|
||||||
source_browse = st.button("Browse Source Directory")
|
|
||||||
if source_browse:
|
|
||||||
path = get_directory_path("Select Source Directory")
|
|
||||||
if path:
|
|
||||||
source_dir = path
|
|
||||||
st.session_state['source_dir'] = path # Persist the selection
|
|
||||||
|
|
||||||
dest_dir = st.text_input("Destination Directory Path",
|
|
||||||
help="Enter the full path where processed files will be saved")
|
|
||||||
dest_browse = st.button("Browse Destination Directory")
|
|
||||||
if dest_browse:
|
|
||||||
path = get_directory_path("Select Destination Directory")
|
|
||||||
if path:
|
|
||||||
dest_dir = path
|
|
||||||
st.session_state['dest_dir'] = path # Persist the selection
|
|
||||||
|
|
||||||
with col2:
|
|
||||||
if file_type == "Excel":
|
|
||||||
password_option = st.radio(
|
|
||||||
"Password Option:",
|
|
||||||
("No Password", "Password File")
|
|
||||||
)
|
|
||||||
|
|
||||||
passwords = []
|
|
||||||
if password_option == "Password File":
|
|
||||||
password_path = st.text_input("Password File Path",
|
|
||||||
help="Enter the full path to the text file containing passwords")
|
|
||||||
password_browse = st.button("Browse Password File")
|
|
||||||
if password_browse:
|
|
||||||
try:
|
|
||||||
import tkinter as tk
|
|
||||||
from tkinter import filedialog
|
|
||||||
root = tk.Tk()
|
|
||||||
root.withdraw()
|
|
||||||
file_path = filedialog.askopenfilename(
|
|
||||||
title="Select Password File",
|
|
||||||
filetypes=[("Text Files", "*.txt")]
|
|
||||||
)
|
|
||||||
if file_path:
|
|
||||||
password_path = file_path
|
|
||||||
st.session_state['password_path'] = file_path
|
|
||||||
except Exception as e:
|
|
||||||
st.error(f"Error opening file dialog: {str(e)}")
|
|
||||||
|
|
||||||
if password_path and os.path.exists(password_path):
|
|
||||||
with open(password_path, 'r', encoding='utf-8') as pf:
|
|
||||||
passwords = [line.strip() for line in pf if line.strip()]
|
|
||||||
st.info(f"Loaded {len(passwords)} passwords from file")
|
|
||||||
|
|
||||||
if source_dir and dest_dir and st.button("Process Directory", type="primary"):
|
|
||||||
if not os.path.exists(source_dir):
|
if not os.path.exists(source_dir):
|
||||||
st.error(f"Source directory does not exist: {source_dir}")
|
st.error(f"Source directory does not exist: {source_dir}")
|
||||||
elif not os.path.exists(os.path.dirname(dest_dir)):
|
elif not os.path.exists(os.path.dirname(dest_dir)):
|
||||||
@ -273,32 +274,39 @@ if uploaded_files:
|
|||||||
os.makedirs(dest_dir, exist_ok=True)
|
os.makedirs(dest_dir, exist_ok=True)
|
||||||
|
|
||||||
# Get all files recursively
|
# Get all files recursively
|
||||||
if file_type == "Excel":
|
all_files = []
|
||||||
files = glob.glob(os.path.join(source_dir, "**/*.xlsx"), recursive=True)
|
files_to_process = []
|
||||||
files.extend(glob.glob(os.path.join(source_dir, "**/*.xlsm"), recursive=True))
|
|
||||||
else: # Word
|
|
||||||
files = glob.glob(os.path.join(source_dir, "**/*.docx"), recursive=True)
|
|
||||||
files.extend(glob.glob(os.path.join(source_dir, "**/*.docm"), recursive=True))
|
|
||||||
|
|
||||||
if not files:
|
for root, _, files in os.walk(source_dir):
|
||||||
st.warning(f"No {file_type} files found in the source directory")
|
for file in files:
|
||||||
|
full_path = os.path.join(root, file)
|
||||||
|
file_lower = file.lower()
|
||||||
|
|
||||||
|
if file_type == "Excel" and file_lower.endswith(('.xlsx', '.xlsm')):
|
||||||
|
files_to_process.append(full_path)
|
||||||
|
elif file_type == "Word" and file_lower.endswith(('.docx', '.docm')):
|
||||||
|
files_to_process.append(full_path)
|
||||||
else:
|
else:
|
||||||
|
all_files.append(full_path)
|
||||||
|
|
||||||
|
if not files_to_process:
|
||||||
|
st.warning(f"No {file_type} files found in the source directory")
|
||||||
|
|
||||||
|
total_files = len(files_to_process) + len(all_files)
|
||||||
progress_bar = st.progress(0)
|
progress_bar = st.progress(0)
|
||||||
status_text = st.empty()
|
status_text = st.empty()
|
||||||
|
files_processed = 0
|
||||||
|
|
||||||
for idx, source_path in enumerate(files):
|
# Process Office files
|
||||||
|
for source_path in files_to_process:
|
||||||
try:
|
try:
|
||||||
# Create a container for each file
|
|
||||||
relative_path = os.path.relpath(source_path, source_dir)
|
relative_path = os.path.relpath(source_path, source_dir)
|
||||||
dest_path = os.path.join(dest_dir, relative_path)
|
dest_path = os.path.join(dest_dir, relative_path)
|
||||||
|
|
||||||
with st.expander(f"Processing {relative_path}", expanded=True):
|
with st.expander(f"Processing {relative_path}", expanded=True):
|
||||||
st.write(f"📝 Processing {relative_path}...")
|
st.write(f"📝 Processing {relative_path}...")
|
||||||
|
|
||||||
# Create destination directory if needed
|
|
||||||
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
||||||
|
|
||||||
# Process based on file type
|
|
||||||
if file_type == "Excel":
|
if file_type == "Excel":
|
||||||
copy_excel_file(source_path, dest_path, passwords)
|
copy_excel_file(source_path, dest_path, passwords)
|
||||||
else: # Word
|
else: # Word
|
||||||
@ -306,36 +314,80 @@ if uploaded_files:
|
|||||||
|
|
||||||
st.success("✅ Processing complete!")
|
st.success("✅ Processing complete!")
|
||||||
|
|
||||||
# Update progress
|
files_processed += 1
|
||||||
progress = (idx + 1) / len(files)
|
progress = files_processed / total_files
|
||||||
progress_bar.progress(progress)
|
progress_bar.progress(progress)
|
||||||
status_text.text(f"Processed {idx + 1} of {len(files)} files")
|
status_text.text(f"Processed {files_processed} of {total_files} files")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
st.error(f"❌ Error processing {relative_path}: {str(e)}")
|
error_msg = f"❌ Error processing {relative_path}: {str(e)}"
|
||||||
|
st.error(error_msg)
|
||||||
|
st.session_state.error_log[source_path] = str(e)
|
||||||
|
|
||||||
progress_bar.empty()
|
# Copy all other files
|
||||||
status_text.text("✨ All processing complete!")
|
with st.expander("Copying other files", expanded=True):
|
||||||
|
for source_path in all_files:
|
||||||
|
try:
|
||||||
|
relative_path = os.path.relpath(source_path, source_dir)
|
||||||
|
dest_path = os.path.join(dest_dir, relative_path)
|
||||||
|
|
||||||
# Show the output directory
|
# Create destination directory if it doesn't exist
|
||||||
st.success(f"Processed files are saved in: {dest_dir}")
|
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
||||||
|
|
||||||
|
# Copy the file
|
||||||
|
import shutil
|
||||||
|
shutil.copy2(source_path, dest_path)
|
||||||
|
|
||||||
|
files_processed += 1
|
||||||
|
progress = files_processed / total_files
|
||||||
|
progress_bar.progress(progress)
|
||||||
|
status_text.text(f"Processed {files_processed} of {total_files} files")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
error_msg = f"❌ Error copying {relative_path}: {str(e)}"
|
||||||
|
st.error(error_msg)
|
||||||
|
st.session_state.error_log[source_path] = str(e)
|
||||||
|
|
||||||
|
st.success(f"✨ All files processed and saved to: {dest_dir}")
|
||||||
|
if len(all_files) > 0:
|
||||||
|
st.info(f"📁 Copied {len(all_files)} additional files to maintain folder structure")
|
||||||
|
|
||||||
|
# Error Log Section
|
||||||
|
st.markdown("---")
|
||||||
|
st.header("Error Log")
|
||||||
|
|
||||||
|
if 'error_log' not in st.session_state:
|
||||||
|
st.session_state.error_log = {}
|
||||||
|
|
||||||
|
# Display error log if there are errors
|
||||||
|
if st.session_state.error_log:
|
||||||
|
st.error("The following errors were encountered:")
|
||||||
|
error_text = "\n\n".join([f"File: {path}\nError: {error}" for path, error in st.session_state.error_log.items()])
|
||||||
|
st.text_area("Error Details", error_text, height=200)
|
||||||
|
|
||||||
|
# Add copy button
|
||||||
|
if st.button("Copy Error Log"):
|
||||||
|
st.write("Error log copied to clipboard!")
|
||||||
|
st.session_state.error_log_copied = error_text
|
||||||
|
|
||||||
|
# Add clear button
|
||||||
|
if st.button("Clear Error Log"):
|
||||||
|
st.session_state.error_log = {}
|
||||||
|
st.experimental_rerun()
|
||||||
|
else:
|
||||||
|
st.success("No errors encountered in current session")
|
||||||
|
|
||||||
# Footer
|
# Footer
|
||||||
st.sidebar.markdown("---")
|
st.sidebar.markdown("---")
|
||||||
st.sidebar.markdown("### Instructions")
|
st.sidebar.markdown("### Instructions")
|
||||||
st.sidebar.markdown("""
|
st.sidebar.markdown("""
|
||||||
1. Choose processing mode:
|
1. Select file type (Excel or Word)
|
||||||
- File Upload: Process files via web upload
|
2. Choose input method:
|
||||||
- Directory Processing: Process files from local directories
|
- Upload Files: Process files via web upload
|
||||||
2. Select file type (Excel or Word)
|
- Select Directory: Process files from local directories
|
||||||
3. For File Upload:
|
3. For Excel files, set password options if needed
|
||||||
- Upload your files
|
4. Click Process button
|
||||||
- Set password options if needed
|
5. Monitor progress and download processed files
|
||||||
4. For Directory Processing:
|
|
||||||
- Enter source and destination paths
|
|
||||||
- Provide password file path if needed
|
|
||||||
5. Click Process button
|
|
||||||
6. Monitor progress and check results
|
|
||||||
""")
|
""")
|
||||||
|
|
||||||
st.sidebar.markdown("---")
|
st.sidebar.markdown("---")
|
||||||
@ -345,5 +397,11 @@ This tool helps you remove protection from:
|
|||||||
- Excel workbooks (.xlsx, .xlsm)
|
- Excel workbooks (.xlsx, .xlsm)
|
||||||
- Word documents (.docx, .docm)
|
- Word documents (.docx, .docm)
|
||||||
|
|
||||||
|
Upload Limits:
|
||||||
|
- Individual files: up to 200MB each
|
||||||
|
- Total upload size: up to 800MB per session
|
||||||
|
|
||||||
|
For larger files or bulk processing, use the 'Select Directory' option to process files locally.
|
||||||
|
|
||||||
No files are stored on the server - all processing happens in your browser!
|
No files are stored on the server - all processing happens in your browser!
|
||||||
""")
|
""")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user