diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..212197d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,47 @@ +# Git +.git +.gitignore + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +venv/ +env/ +.venv/ +ENV/ +*.egg-info/ +dist/ +build/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# Tests +excel_filter/tests/ +pytest.ini +*.test.py + +# Documentation +README.md + +# Build artifacts +excel_filter/build/ +excel_filter/*.spec +excel_filter/*.exe +excel_filter/*.bat + +# Development files +*.log +.env +presets.json + +# Temporary files +*.tmp +*.temp +.cache/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6e843bc..faf7528 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.vscode excel_filter/build/ excel_filter/dist/ +venv/ diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 0000000..ea2721a --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,16 @@ +[server] +port = 8501 +address = "0.0.0.0" +headless = true +enableXsrfProtection = true +enableWebsocketCompression = true + +[browser] +gatherUsageStats = false + +[theme] +primaryColor = "#F03E3E" +backgroundColor = "#0F1115" +secondaryBackgroundColor = "#1E2028" +textColor = "#FFFFFF" +font = "sans serif" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..33f6f6a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,46 @@ +# Excel Filter Tool - Streamlit Docker Image +# Optimized for Coolify deployment + +FROM python:3.11-slim + +# Set working directory +WORKDIR /app + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY excel_filter/requirements.txt ./requirements.txt + +# Install Python dependencies +RUN pip install --upgrade pip && \ + pip install -r requirements.txt && \ + pip install streamlit + +# Copy application code +COPY excel_filter/ ./excel_filter/ +COPY streamlit_app.py . +COPY .streamlit/ .streamlit/ + +# Create a non-root user for security +RUN useradd --create-home --shell /bin/bash appuser && \ + chown -R appuser:appuser /app +USER appuser + +# Expose Streamlit port +EXPOSE 8501 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD curl --fail http://localhost:8501/_stcore/health || exit 1 + +# Run Streamlit +ENTRYPOINT ["streamlit", "run", "streamlit_app.py"] diff --git a/README_STREAMLIT.md b/README_STREAMLIT.md new file mode 100644 index 0000000..50fc0f0 --- /dev/null +++ b/README_STREAMLIT.md @@ -0,0 +1,132 @@ +# Excel Filter Tool - Streamlit Web Application + +A modern web-based Excel filtering tool built with Streamlit. This application allows you to filter Excel files using regex patterns, numeric filters, and column selection - all from your browser without installing any software. + +## Features + +- đ **Easy File Upload**: Drag and drop Excel files (.xlsx, .xls) +- đ **Regex Filtering**: Filter rows using powerful regex patterns +- đą **Numeric Filters**: Filter by numeric comparisons (>, <, >=, <=, =) +- đ **Column Selection**: Choose specific columns for output +- đ **Multi-language Support**: German and English interface +- đ **Statistics**: View filtering statistics and retention rates +- đŸ **Configuration**: Save and load filter configurations + +## Quick Start + +### Run Locally (Windows) + +```bash +run_streamlit.bat +``` + +### Run Locally (Linux/Mac) + +```bash +chmod +x run_streamlit.sh +./run_streamlit.sh +``` + +### Run with Docker + +```bash +docker build -t excel-filter . +docker run -p 8501:8501 excel-filter +``` + +### Run with Docker Compose + +```bash +docker-compose up -d +``` + +## Coolify Deployment + +### Option 1: Docker Compose + +1. Push this repository to your Git server +2. In Coolify, create a new resource and select "Docker Compose" +3. Point to your repository +4. Deploy! + +### Option 2: Dockerfile + +1. In Coolify, create a new resource and select "Dockerfile" +2. Point to your repository +3. Set the port to `8501` +4. Deploy! + +### Environment Variables (Optional) + +No environment variables are required, but you can set: + +- `TZ` - Timezone (default: UTC) + +## Usage + +1. **Upload**: Drag and drop an Excel file or click to browse +2. **Select Sheet**: Choose the worksheet to filter +3. **Configure Filters**: + - **Regex Tab**: Enable regex filtering and enter a pattern + - **Numeric Tab**: Set up numeric comparisons + - **Columns Tab**: Select which columns to include +4. **Apply**: Click "Apply Filters" to process +5. **Download**: Click "Download Filtered File" to get results + +## File Structure + +``` +âââ streamlit_app.py # Main Streamlit application +âââ Dockerfile # Docker configuration +âââ docker-compose.yml # Docker Compose configuration +âââ .dockerignore # Docker ignore file +âââ .streamlit/ +â âââ config.toml # Streamlit configuration +âââ run_streamlit.bat # Windows runner script +âââ run_streamlit.sh # Linux/Mac runner script +âââ excel_filter/ # Core filter module + âââ filter.py # Main filter logic + âââ requirements.txt # Python dependencies + âââ locales/ # Translations + âââ de.json + âââ en.json +``` + +## Regex Examples + +| Pattern | Description | +|---------|-------------| +| `error\|warning` | Find rows with "error" OR "warning" | +| `[0-9]{4}` | Find 4-digit numbers | +| `[a-z]+@[a-z]+\.[a-z]{2,}` | Find email addresses | +| `\d{4}-\d{2}-\d{2}` | Find dates (YYYY-MM-DD) | +| `error.*critical` | Find "error" followed by "critical" | + +## Development + +### Prerequisites + +- Python 3.11+ +- pip + +### Installation + +```bash +python -m venv venv +source venv/bin/activate # Linux/Mac +# or +venv\Scripts\activate # Windows + +pip install -r excel_filter/requirements.txt +pip install streamlit +``` + +### Run Development Server + +```bash +streamlit run streamlit_app.py +``` + +## License + +See LICENSE file for details. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d25f5a1 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,35 @@ +# Excel Filter Tool - Docker Compose Configuration +# For Coolify deployment + +version: '3.8' + +services: + excel-filter: + build: + context: . + dockerfile: Dockerfile + container_name: excel-filter-app + restart: unless-stopped + ports: + - "8501:8501" + environment: + - TZ=Europe/Berlin + volumes: + # Optional: Persist temporary files + - temp_data:/tmp + networks: + - excel-filter-network + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:8501/_stcore/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + +volumes: + temp_data: + driver: local + +networks: + excel-filter-network: + driver: bridge \ No newline at end of file diff --git a/excel_filter/requirements.txt b/excel_filter/requirements.txt index c5bf5cb..6d883c4 100644 --- a/excel_filter/requirements.txt +++ b/excel_filter/requirements.txt @@ -3,3 +3,4 @@ pandas>=2.0.3 python-docx>=1.1.0 pytest>=8.0.0 psutil>=5.8.0 +streamlit>=1.28.0 diff --git a/run_streamlit.bat b/run_streamlit.bat new file mode 100644 index 0000000..8af2ab3 --- /dev/null +++ b/run_streamlit.bat @@ -0,0 +1,55 @@ +@echo off +REM Excel Filter Tool - Streamlit App Runner +REM This script starts the Streamlit web application + +echo ======================================== +echo Excel Filter Tool - Web Application +echo ======================================== +echo. + +REM Check if Python is installed +python --version >nul 2>&1 +if errorlevel 1 ( + echo Error: Python is not installed or not in PATH + pause + exit /b 1 +) + +REM Check if we're in the correct directory +if not exist "streamlit_app.py" ( + echo Error: streamlit_app.py not found + echo Please run this script from the project root directory + pause + exit /b 1 +) + +REM Create virtual environment if it doesn't exist +if not exist "venv" ( + echo Creating virtual environment... + python -m venv venv +) + +REM Activate virtual environment +echo Activating virtual environment... +call venv\Scripts\activate.bat + +REM Install/upgrade pip +echo Upgrading pip... +python -m pip install --upgrade pip -q + +REM Install requirements +echo Installing dependencies... +pip install -r excel_filter\requirements.txt -q +pip install streamlit -q + +echo. +echo Starting Streamlit server... +echo The app will open in your browser automatically. +echo Press Ctrl+C to stop the server. +echo. + +REM Run Streamlit app +streamlit run streamlit_app.py + +REM Deactivate virtual environment on exit +call venv\Scripts\deactivate.bat \ No newline at end of file diff --git a/run_streamlit.sh b/run_streamlit.sh new file mode 100644 index 0000000..25af4cd --- /dev/null +++ b/run_streamlit.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Excel Filter Tool - Streamlit App Runner +# This script starts the Streamlit web application + +echo "========================================" +echo " Excel Filter Tool - Web Application" +echo "========================================" +echo "" + +# Check if Python is installed +if ! command -v python &> /dev/null; then + echo "Error: Python is not installed or not in PATH" + exit 1 +fi + +# Check if we're in the correct directory +if [ ! -f "streamlit_app.py" ]; then + echo "Error: streamlit_app.py not found" + echo "Please run this script from the project root directory" + exit 1 +fi + +# Create virtual environment if it doesn't exist +if [ ! -d "venv" ]; then + echo "Creating virtual environment..." + python -m venv venv +fi + +# Activate virtual environment +echo "Activating virtual environment..." +source venv/bin/activate 2>/dev/null || source venv/Scripts/activate 2>/dev/null + +# Install/upgrade pip +echo "Upgrading pip..." +pip install --upgrade pip -q + +# Install requirements +echo "Installing dependencies..." +pip install -r excel_filter/requirements.txt -q +pip install streamlit -q + +echo "" +echo "Starting Streamlit server..." +echo "The app will open in your browser automatically." +echo "Press Ctrl+C to stop the server." +echo "" + +# Run Streamlit app +streamlit run streamlit_app.py --server.headless=true + +# Deactivate virtual environment on exit +deactivate \ No newline at end of file diff --git a/streamlit_app.py b/streamlit_app.py new file mode 100644 index 0000000..3c76da0 --- /dev/null +++ b/streamlit_app.py @@ -0,0 +1,698 @@ +import streamlit as st +import pandas as pd +import re +from io import BytesIO +from typing import List, Dict, Any, Optional +import time + +# Seitenkonfiguration +st.set_page_config( + page_title="Excel Filter Tool", + page_icon=None, + layout="wide", + initial_sidebar_state="expanded" +) + +# --- Regex Bausteine --- +REGEX_BRICKS = { + "Exakter Text": { + "regex": "{}", + "desc": "Findet den exakten Text, den du eingibst. Sonderzeichen werden automatisch maskiert.", + "needs_input": True, + "allows_quantifier": True + }, + "Ziffer (0-9)": { + "regex": r"\d", + "desc": "Findet eine einzelne Ziffer von 0 bis 9.", + "needs_input": False, + "allows_quantifier": True + }, + "Buchstabe (A-Z, a-z)": { + "regex": r"[a-zA-Z]", + "desc": "Findet einen einzelnen Buchstaben des deutschen Alphabets, sowohl GroĂ- als auch Kleinschreibung. Achtung, gilt nicht fĂŒr Umlaute!", + "needs_input": False, + "allows_quantifier": True + }, + "Leerzeichen": { + "regex": r"\s", + "desc": "Findet Leerzeichen, Tabulatoren und ZeilenumbrĂŒche.", + "needs_input": False, + "allows_quantifier": True + }, + "Beliebiges einzelnes Zeichen": { + "regex": r".", + "desc": "Findet genau ein beliebiges Zeichen (Buchstabe, Ziffer, Symbol oder Leerzeichen).", + "needs_input": False, + "allows_quantifier": True + }, + "Beliebige Zeichenfolge": { + "regex": r".*", + "desc": "Findet null oder mehr beliebige Zeichen. NĂŒtzlich als breiter Platzhalter.", + "needs_input": False, + "allows_quantifier": False + }, + "ODER (Alternative)": { + "regex": r"|", + "desc": "Funktioniert als logischer ODER-Operator. Das Muster findet entweder den Ausdruck davor oder danach.", + "needs_input": False, + "allows_quantifier": False + }, + "Zeilenanfang": { + "regex": r"^", + "desc": "Verankert den Treffer am Anfang einer Zeile oder Zeichenkette.", + "needs_input": False, + "allows_quantifier": False + }, + "Zeilenende": { + "regex": r"$", + "desc": "Verankert den Treffer am Ende einer Zeile oder Zeichenkette.", + "needs_input": False, + "allows_quantifier": False + } +} + +QUANTIFIERS = { + "Genau 1 (Standard)": "", + "1 oder mehr (+)": "+", + "0 oder mehr (*)": "*", + "Optional: 0 oder 1 (?)": "?" +} + +def get_pattern_presets() -> Dict[str, str]: + return { + "Fehler & Warnungen": r"error|warning|critical|fehler|warnung", + "Nur Fehler": r"error|fehler", + "Nur Warnungen": r"warning|warnung", + "Kritische Fehler": r"critical|kritisch", + "E-Mail-Adressen": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", + "Telefonnummern": r"\+?[0-9\s-]{10,}", + "Datum (JJJJ-MM-TT)": r"\d{4}-\d{2}-\d{2}", + } + +def init_session_state(): + defaults = { + "df": None, + "sheets": [], + "selected_sheet": None, + "columns": [], + "filtered_df": None, + "stats": None, + "regex_enabled": True, + "numeric_enabled": False, + "column_selection_enabled": False, + "selected_columns": [], + "regex_pattern": "", + "regex_test_text": "", + "regex_blocks": [], + "temp_block_val": "", + "temp_quantifier": "Genau 1 (Standard)" + } + for key, val in defaults.items(): + if key not in st.session_state: + st.session_state[key] = val + +def load_excel_file(uploaded_file) -> tuple: + try: + file_bytes = BytesIO(uploaded_file.getvalue()) + xls = pd.ExcelFile(file_bytes) + sheets = xls.sheet_names + file_bytes.seek(0) + df = pd.read_excel(file_bytes, sheet_name=sheets[0]) + return df, sheets, None + except Exception as e: + return None, [], str(e) + +def apply_filters(df: pd.DataFrame, pattern: Optional[str] = None, regex_column: Optional[str] = None, numeric_filter: Optional[Dict[str, Any]] = None, selected_columns: Optional[List[str]] = None) -> tuple: + start_time = time.time() + input_rows = len(df) + input_columns = len(df.columns) + filtered_df = df.copy() + filters_applied = [] + + if pattern and pattern.strip(): + try: + columns_to_search = [regex_column] if regex_column and regex_column != "Alle Spalten" else df.columns.tolist() + regex = re.compile(pattern, re.IGNORECASE) + mask = filtered_df.apply(lambda row: any(regex.search(str(row[col])) for col in columns_to_search if col in row and pd.notna(row[col])), axis=1) + filtered_df = filtered_df[mask] + filters_applied.append("Regex") + except re.error as e: + return None, None, f"UngĂŒltiges Regex-Muster: {e}" + + if numeric_filter and numeric_filter.get("column"): + try: + column = numeric_filter["column"] + operator = numeric_filter["operator"] + value = numeric_filter["value"] + + if column == "Alle Spalten": + combined_mask = pd.Series([False] * len(filtered_df), index=filtered_df.index) + for col in filtered_df.columns: + num_series = pd.to_numeric(filtered_df[col], errors='coerce') + col_mask = eval(f"num_series {operator} value") + combined_mask = combined_mask | col_mask + filtered_df = filtered_df[combined_mask] + else: + num_series = pd.to_numeric(filtered_df[column], errors='coerce') + filtered_df = filtered_df[eval(f"num_series {operator} value")] + filters_applied.append("Numerisch") + except Exception as e: + return None, None, f"Fehler beim Anwenden des numerischen Filters: {e}" + + if selected_columns: + available_columns = [col for col in selected_columns if col in filtered_df.columns] + if available_columns: + filtered_df = filtered_df[available_columns] + + end_time = time.time() + stats = { + "input_rows": input_rows, + "input_columns": input_columns, + "output_rows": len(filtered_df), + "output_columns": len(filtered_df.columns), + "rows_removed": input_rows - len(filtered_df), + "processing_time": end_time - start_time, + "filters_applied": filters_applied, + "retention_rate": (len(filtered_df) / input_rows * 100) if input_rows > 0 else 0 + } + return filtered_df, stats, None + +def explain_regex_german(blocks: List[Dict]) -> str: + """Ăbersetzt die Regex-Bausteine in einen deutschen Satz.""" + if not blocks: + return "Muster ist leer." + + explanations = [] + for block in blocks: + b_type = block["key"] + val = block.get("value", "") + q_key = block.get("quantifier_key", "Genau 1 (Standard)") + + # 1. Grundbegriff + if "Exakter Text" in b_type: noun = f"den exakten Text '{val}'" + elif "Ziffer" in b_type: noun = "eine Ziffer (0-9)" + elif "Buchstabe" in b_type: noun = "einen Buchstaben (A-Z oder a-z)" + elif "Leerzeichen" in b_type: noun = "ein Leerzeichen" + elif "Beliebiges einzelnes Zeichen" in b_type: noun = "ein beliebiges einzelnes Zeichen" + elif "Beliebige Zeichenfolge" in b_type: noun = "eine beliebige Zeichenfolge" + elif "ODER" in b_type: noun = "ODER" + elif "Zeilenanfang" in b_type: noun = "den Anfang der Zeichenkette" + elif "Zeilenende" in b_type: noun = "das Ende der Zeichenkette" + else: noun = "ein Element" + + # 2. Quantoren anwenden + if noun not in ["ODER", "den Anfang der Zeichenkette", "das Ende der Zeichenkette", "eine beliebige Zeichenfolge"]: + if "1 oder mehr" in q_key: + noun = f"eine oder mehr {noun.replace('eine ', '').replace('einen ', '').replace('ein ', '').replace('den ', '').replace('das ', '')}en" if any(noun.startswith(x) for x in ["eine ", "einen ", "ein ", "den ", "das "]) else f"ein oder mehr {noun}" + elif "0 oder mehr" in q_key: + noun = f"null oder mehr {noun.replace('eine ', '').replace('einen ', '').replace('ein ', '').replace('den ', '').replace('das ', '')}en" if any(noun.startswith(x) for x in ["eine ", "einen ", "ein ", "den ", "das "]) else f"null oder mehr {noun}" + elif "Optional" in q_key: + noun = f"ein optionales {noun.replace('eine ', '').replace('einen ', '').replace('ein ', '').replace('den ', '').replace('das ', '')}" if any(noun.startswith(x) for x in ["eine ", "einen ", "ein ", "den ", "das "]) else f"ein optionales {noun}" + + explanations.append(noun) + + # 3. ZusammenfĂŒgen + sentence = "" + for i, exp in enumerate(explanations): + if i == 0: + sentence += exp + else: + if exp == "ODER" or explanations[i-1] == "ODER": + sentence += f" {exp} " + else: + sentence += f", gefolgt von {exp}" + + return sentence[:1].upper() + sentence[1:] + "." + +def apply_sleek_dark_theme(): + st.markdown(""" + + """, unsafe_allow_html=True) + +def render_pipeline_tab(): + st.markdown('
{block['value']}" if block['value'] else ""
+ q_key = block.get('quantifier_key', 'Genau 1 (Standard)')
+ q_display = f" [ {q_key} ]" if q_key != "Genau 1 (Standard)" else ""
+
+ st.markdown(f'