From 43afbf694d7cf9d5847919512f6743eed4aa7d92 Mon Sep 17 00:00:00 2001 From: "Bobby Abellana (aider)" Date: Tue, 11 Feb 2025 10:01:05 -0800 Subject: [PATCH] feat: Consolidate file processing modes into a single, unified interface This commit introduces a significant improvement to the Streamlit app's user interface by merging the file upload and directory processing modes into a single, more intuitive workflow. Key changes include: - Added a radio button to select between file upload and directory processing - Unified the UI for both Excel and Word file types - Maintained all existing functionality - Simplified sidebar instructions - Improved error handling and progress tracking - Kept ZIP download feature for multiple file uploads The new interface provides a more consistent and user-friendly experience while preserving the app's core functionality. --- src/streamlit_app.py | 341 +++++++++++++++++++------------------------ 1 file changed, 148 insertions(+), 193 deletions(-) diff --git a/src/streamlit_app.py b/src/streamlit_app.py index b6e9ae2..e19aea3 100644 --- a/src/streamlit_app.py +++ b/src/streamlit_app.py @@ -98,21 +98,48 @@ with st.sidebar: ) # Main content area -if processing_mode == "File Upload": - if file_type == "Excel": - st.header("Excel File Processing") - - col1, col2 = st.columns(2) +st.header(f"{file_type} File Processing") + +col1, col2 = st.columns(2) + +with col1: + # Input method selection + input_method = st.radio( + "Choose input method:", + ("Upload Files", "Select Directory"), + help="Upload files directly or process from local directory" + ) - with col1: + if input_method == "Upload Files": uploaded_files = st.file_uploader( - "Upload Excel files", - type=["xlsx", "xlsm"], + f"Upload {file_type} files", + type=["xlsx", "xlsm"] if file_type == "Excel" else ["docx", "docm"], accept_multiple_files=True, - help="You can upload multiple Excel files" + help=f"You can upload multiple {file_type} files" ) - - with col2: + else: # Select Directory + source_dir = st.text_input("Source Directory Path", + value=st.session_state.get('source_dir', ''), + help="Enter the full path to the directory containing files to process") + source_browse = st.button("Browse Source Directory") + if source_browse: + path = get_directory_path("Select Source Directory") + if path: + st.session_state['source_dir'] = path + st.experimental_rerun() + + dest_dir = st.text_input("Destination Directory Path", + value=st.session_state.get('dest_dir', ''), + help="Enter the full path where processed files will be saved") + dest_browse = st.button("Browse Destination Directory") + if dest_browse: + path = get_directory_path("Select Destination Directory") + if path: + st.session_state['dest_dir'] = path + st.experimental_rerun() + +with col2: + if file_type == "Excel": password_option = st.radio( "Password Option:", ("No Password", "Single Password", "Password File") @@ -124,124 +151,13 @@ if processing_mode == "File Upload": if password: passwords = [password] elif password_option == "Password File": - password_file = st.file_uploader("Upload password file", type=["txt"]) - if password_file: - content = password_file.getvalue().decode() - passwords = [line.strip() for line in content.splitlines() if line.strip()] - st.info(f"Loaded {len(passwords)} passwords") - -else: # Word - st.header("Word Document Processing") - uploaded_files = st.file_uploader( - "Upload Word files", - type=["docx", "docm"], - accept_multiple_files=True, - help="You can upload multiple Word files" - ) - -if uploaded_files: - if st.button(f"Process {file_type} Files", type="primary"): - progress_bar = st.progress(0) - status_text = st.empty() - - # Dictionary to store processed files for zip download - processed_files = {} - - for idx, uploaded_file in enumerate(uploaded_files): - try: - # Create a container for each file - with st.expander(f"Processing {uploaded_file.name}", expanded=True): - st.write(f"📝 Processing {uploaded_file.name}...") - - # Save uploaded file temporarily - temp_input_path = save_uploaded_file(uploaded_file) - temp_output_path = os.path.join( - os.path.dirname(temp_input_path), - f"processed_{os.path.basename(temp_input_path)}" - ) - - # Process based on file type - if file_type == "Excel": - copy_excel_file(temp_input_path, temp_output_path, passwords) - else: # Word - remove_all_protection_tags(temp_input_path, temp_output_path) - - # Provide download button - with open(temp_output_path, "rb") as f: - processed_file = f.read() - processed_files[f"processed_{uploaded_file.name}"] = processed_file - - mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" if file_type == "Excel" else "application/vnd.openxmlformats-officedocument.wordprocessingml.document" - - st.download_button( - label=f"⬇️ Download processed file", - data=processed_file, - file_name=f"processed_{uploaded_file.name}", - mime=mime_type - ) - - # Cleanup temporary files - os.unlink(temp_input_path) - os.unlink(temp_output_path) - - st.success("✅ Processing complete!") - - # Update progress - progress = (idx + 1) / len(uploaded_files) - progress_bar.progress(progress) - status_text.text(f"Processed {idx + 1} of {len(uploaded_files)} files") - - except Exception as e: - st.error(f"❌ Error processing {uploaded_file.name}: {str(e)}") - - progress_bar.empty() - status_text.text("✨ All processing complete!") - - # Add zip download button if multiple files were processed - if len(processed_files) > 1: - zip_buffer = create_zip_file(processed_files) - st.download_button( - label="⬇️ Download all files as ZIP", - data=zip_buffer.getvalue(), - file_name="processed_files.zip", - mime="application/zip", - ) - -else: # Directory Processing - st.header("Directory Processing") - - col1, col2 = st.columns(2) - - with col1: - source_dir = st.text_input("Source Directory Path", - help="Enter the full path to the directory containing files to process", - value=st.session_state.get('source_dir', '')) - source_browse = st.button("Browse Source Directory") - if source_browse: - path = get_directory_path("Select Source Directory") - if path: - st.session_state['source_dir'] = path - st.experimental_rerun() - - dest_dir = st.text_input("Destination Directory Path", - help="Enter the full path where processed files will be saved", - value=st.session_state.get('dest_dir', '')) - dest_browse = st.button("Browse Destination Directory") - if dest_browse: - path = get_directory_path("Select Destination Directory") - if path: - st.session_state['dest_dir'] = path - st.experimental_rerun() - - with col2: - if file_type == "Excel": - password_option = st.radio( - "Password Option:", - ("No Password", "Password File") - ) - - passwords = [] - if password_option == "Password File": + if input_method == "Upload Files": + password_file = st.file_uploader("Upload password file", type=["txt"]) + if password_file: + content = password_file.getvalue().decode() + passwords = [line.strip() for line in content.splitlines() if line.strip()] + st.info(f"Loaded {len(passwords)} passwords") + else: # Select Directory password_path = st.text_input("Password File Path", help="Enter the full path to the text file containing passwords", value=st.session_state.get('password_path', '')) @@ -266,79 +182,118 @@ else: # Directory Processing with open(password_path, 'r', encoding='utf-8') as pf: passwords = [line.strip() for line in pf if line.strip()] st.info(f"Loaded {len(passwords)} passwords from file") + +# Process button and logic +if input_method == "Upload Files" and uploaded_files and st.button("Process Files", type="primary"): + progress_bar = st.progress(0) + status_text = st.empty() + processed_files = {} - if source_dir and dest_dir and st.button("Process Directory", type="primary"): - if not os.path.exists(source_dir): - st.error(f"Source directory does not exist: {source_dir}") - elif not os.path.exists(os.path.dirname(dest_dir)): - st.error(f"Parent of destination directory does not exist: {os.path.dirname(dest_dir)}") + for idx, uploaded_file in enumerate(uploaded_files): + try: + with st.expander(f"Processing {uploaded_file.name}", expanded=True): + st.write(f"📝 Processing {uploaded_file.name}...") + temp_input_path = save_uploaded_file(uploaded_file) + temp_output_path = f"{temp_input_path}_processed" + + if file_type == "Excel": + copy_excel_file(temp_input_path, temp_output_path, passwords) + else: # Word + remove_all_protection_tags(temp_input_path, temp_output_path) + + with open(temp_output_path, "rb") as f: + processed_file = f.read() + processed_files[f"processed_{uploaded_file.name}"] = processed_file + + mime_type = ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" + if file_type == "Excel" else + "application/vnd.openxmlformats-officedocument.wordprocessingml.document") + + st.download_button( + label=f"⬇️ Download processed file", + data=processed_file, + file_name=f"processed_{uploaded_file.name}", + mime=mime_type + ) + + os.unlink(temp_input_path) + os.unlink(temp_output_path) + st.success("✅ Processing complete!") + + progress = (idx + 1) / len(uploaded_files) + progress_bar.progress(progress) + status_text.text(f"Processed {idx + 1} of {len(uploaded_files)} files") + + except Exception as e: + st.error(f"❌ Error processing {uploaded_file.name}: {str(e)}") + + if len(processed_files) > 1: + zip_buffer = create_zip_file(processed_files) + st.download_button( + label="⬇️ Download all files as ZIP", + data=zip_buffer.getvalue(), + file_name="processed_files.zip", + mime="application/zip", + ) + +elif input_method == "Select Directory" and source_dir and dest_dir and st.button("Process Directory", type="primary"): + if not os.path.exists(source_dir): + st.error(f"Source directory does not exist: {source_dir}") + elif not os.path.exists(os.path.dirname(dest_dir)): + st.error(f"Parent of destination directory does not exist: {os.path.dirname(dest_dir)}") + else: + os.makedirs(dest_dir, exist_ok=True) + + # Get all files recursively + if file_type == "Excel": + files = glob.glob(os.path.join(source_dir, "**/*.xlsx"), recursive=True) + files.extend(glob.glob(os.path.join(source_dir, "**/*.xlsm"), recursive=True)) + else: # Word + files = glob.glob(os.path.join(source_dir, "**/*.docx"), recursive=True) + files.extend(glob.glob(os.path.join(source_dir, "**/*.docm"), recursive=True)) + + if not files: + st.warning(f"No {file_type} files found in the source directory") else: - os.makedirs(dest_dir, exist_ok=True) + progress_bar = st.progress(0) + status_text = st.empty() - # Get all files recursively - if file_type == "Excel": - files = glob.glob(os.path.join(source_dir, "**/*.xlsx"), recursive=True) - files.extend(glob.glob(os.path.join(source_dir, "**/*.xlsm"), recursive=True)) - else: # Word - files = glob.glob(os.path.join(source_dir, "**/*.docx"), recursive=True) - files.extend(glob.glob(os.path.join(source_dir, "**/*.docm"), recursive=True)) + for idx, source_path in enumerate(files): + try: + relative_path = os.path.relpath(source_path, source_dir) + dest_path = os.path.join(dest_dir, relative_path) + + with st.expander(f"Processing {relative_path}", expanded=True): + st.write(f"📝 Processing {relative_path}...") + os.makedirs(os.path.dirname(dest_path), exist_ok=True) + + if file_type == "Excel": + copy_excel_file(source_path, dest_path, passwords) + else: # Word + remove_all_protection_tags(source_path, dest_path) + + st.success("✅ Processing complete!") + + progress = (idx + 1) / len(files) + progress_bar.progress(progress) + status_text.text(f"Processed {idx + 1} of {len(files)} files") + + except Exception as e: + st.error(f"❌ Error processing {relative_path}: {str(e)}") - if not files: - st.warning(f"No {file_type} files found in the source directory") - else: - progress_bar = st.progress(0) - status_text = st.empty() - - for idx, source_path in enumerate(files): - try: - # Create a container for each file - relative_path = os.path.relpath(source_path, source_dir) - dest_path = os.path.join(dest_dir, relative_path) - - with st.expander(f"Processing {relative_path}", expanded=True): - st.write(f"📝 Processing {relative_path}...") - - # Create destination directory if needed - os.makedirs(os.path.dirname(dest_path), exist_ok=True) - - # Process based on file type - if file_type == "Excel": - copy_excel_file(source_path, dest_path, passwords) - else: # Word - remove_all_protection_tags(source_path, dest_path) - - st.success("✅ Processing complete!") - - # Update progress - progress = (idx + 1) / len(files) - progress_bar.progress(progress) - status_text.text(f"Processed {idx + 1} of {len(files)} files") - - except Exception as e: - st.error(f"❌ Error processing {relative_path}: {str(e)}") - - progress_bar.empty() - status_text.text("✨ All processing complete!") - - # Show the output directory - st.success(f"Processed files are saved in: {dest_dir}") + st.success(f"✨ All files processed and saved to: {dest_dir}") # Footer st.sidebar.markdown("---") st.sidebar.markdown("### Instructions") st.sidebar.markdown(""" -1. Choose processing mode: - - File Upload: Process files via web upload - - Directory Processing: Process files from local directories -2. Select file type (Excel or Word) -3. For File Upload: - - Upload your files - - Set password options if needed -4. For Directory Processing: - - Enter source and destination paths - - Provide password file path if needed -5. Click Process button -6. Monitor progress and check results +1. Select file type (Excel or Word) +2. Choose input method: + - Upload Files: Process files via web upload + - Select Directory: Process files from local directories +3. For Excel files, set password options if needed +4. Click Process button +5. Monitor progress and download processed files """) st.sidebar.markdown("---")