diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..212197d --- /dev/null +++ b/.dockerignore @@ -0,0 +1,47 @@ +# Git +.git +.gitignore + +# Python +__pycache__ +*.py[cod] +*$py.class +*.so +.Python +venv/ +env/ +.venv/ +ENV/ +*.egg-info/ +dist/ +build/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo + +# Tests +excel_filter/tests/ +pytest.ini +*.test.py + +# Documentation +README.md + +# Build artifacts +excel_filter/build/ +excel_filter/*.spec +excel_filter/*.exe +excel_filter/*.bat + +# Development files +*.log +.env +presets.json + +# Temporary files +*.tmp +*.temp +.cache/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6e843bc..faf7528 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.vscode excel_filter/build/ excel_filter/dist/ +venv/ diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 0000000..ea2721a --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,16 @@ +[server] +port = 8501 +address = "0.0.0.0" +headless = true +enableXsrfProtection = true +enableWebsocketCompression = true + +[browser] +gatherUsageStats = false + +[theme] +primaryColor = "#F03E3E" +backgroundColor = "#0F1115" +secondaryBackgroundColor = "#1E2028" +textColor = "#FFFFFF" +font = "sans serif" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..33f6f6a --- /dev/null +++ b/Dockerfile @@ -0,0 +1,46 @@ +# Excel Filter Tool - Streamlit Docker Image +# Optimized for Coolify deployment + +FROM python:3.11-slim + +# Set working directory +WORKDIR /app + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements first for better caching +COPY excel_filter/requirements.txt ./requirements.txt + +# Install Python dependencies +RUN pip install --upgrade pip && \ + pip install -r requirements.txt && \ + pip install streamlit + +# Copy application code +COPY excel_filter/ ./excel_filter/ +COPY streamlit_app.py . +COPY .streamlit/ .streamlit/ + +# Create a non-root user for security +RUN useradd --create-home --shell /bin/bash appuser && \ + chown -R appuser:appuser /app +USER appuser + +# Expose Streamlit port +EXPOSE 8501 + +# Health check +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD curl --fail http://localhost:8501/_stcore/health || exit 1 + +# Run Streamlit +ENTRYPOINT ["streamlit", "run", "streamlit_app.py"] diff --git a/README_STREAMLIT.md b/README_STREAMLIT.md new file mode 100644 index 0000000..50fc0f0 --- /dev/null +++ b/README_STREAMLIT.md @@ -0,0 +1,132 @@ +# Excel Filter Tool - Streamlit Web Application + +A modern web-based Excel filtering tool built with Streamlit. This application allows you to filter Excel files using regex patterns, numeric filters, and column selection - all from your browser without installing any software. + +## Features + +- 📁 **Easy File Upload**: Drag and drop Excel files (.xlsx, .xls) +- 🔍 **Regex Filtering**: Filter rows using powerful regex patterns +- 🔱 **Numeric Filters**: Filter by numeric comparisons (>, <, >=, <=, =) +- 📊 **Column Selection**: Choose specific columns for output +- 🌐 **Multi-language Support**: German and English interface +- 📈 **Statistics**: View filtering statistics and retention rates +- đŸ’Ÿ **Configuration**: Save and load filter configurations + +## Quick Start + +### Run Locally (Windows) + +```bash +run_streamlit.bat +``` + +### Run Locally (Linux/Mac) + +```bash +chmod +x run_streamlit.sh +./run_streamlit.sh +``` + +### Run with Docker + +```bash +docker build -t excel-filter . +docker run -p 8501:8501 excel-filter +``` + +### Run with Docker Compose + +```bash +docker-compose up -d +``` + +## Coolify Deployment + +### Option 1: Docker Compose + +1. Push this repository to your Git server +2. In Coolify, create a new resource and select "Docker Compose" +3. Point to your repository +4. Deploy! + +### Option 2: Dockerfile + +1. In Coolify, create a new resource and select "Dockerfile" +2. Point to your repository +3. Set the port to `8501` +4. Deploy! + +### Environment Variables (Optional) + +No environment variables are required, but you can set: + +- `TZ` - Timezone (default: UTC) + +## Usage + +1. **Upload**: Drag and drop an Excel file or click to browse +2. **Select Sheet**: Choose the worksheet to filter +3. **Configure Filters**: + - **Regex Tab**: Enable regex filtering and enter a pattern + - **Numeric Tab**: Set up numeric comparisons + - **Columns Tab**: Select which columns to include +4. **Apply**: Click "Apply Filters" to process +5. **Download**: Click "Download Filtered File" to get results + +## File Structure + +``` +├── streamlit_app.py # Main Streamlit application +├── Dockerfile # Docker configuration +├── docker-compose.yml # Docker Compose configuration +├── .dockerignore # Docker ignore file +├── .streamlit/ +│ └── config.toml # Streamlit configuration +├── run_streamlit.bat # Windows runner script +├── run_streamlit.sh # Linux/Mac runner script +└── excel_filter/ # Core filter module + ├── filter.py # Main filter logic + ├── requirements.txt # Python dependencies + └── locales/ # Translations + ├── de.json + └── en.json +``` + +## Regex Examples + +| Pattern | Description | +|---------|-------------| +| `error\|warning` | Find rows with "error" OR "warning" | +| `[0-9]{4}` | Find 4-digit numbers | +| `[a-z]+@[a-z]+\.[a-z]{2,}` | Find email addresses | +| `\d{4}-\d{2}-\d{2}` | Find dates (YYYY-MM-DD) | +| `error.*critical` | Find "error" followed by "critical" | + +## Development + +### Prerequisites + +- Python 3.11+ +- pip + +### Installation + +```bash +python -m venv venv +source venv/bin/activate # Linux/Mac +# or +venv\Scripts\activate # Windows + +pip install -r excel_filter/requirements.txt +pip install streamlit +``` + +### Run Development Server + +```bash +streamlit run streamlit_app.py +``` + +## License + +See LICENSE file for details. \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..d25f5a1 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,35 @@ +# Excel Filter Tool - Docker Compose Configuration +# For Coolify deployment + +version: '3.8' + +services: + excel-filter: + build: + context: . + dockerfile: Dockerfile + container_name: excel-filter-app + restart: unless-stopped + ports: + - "8501:8501" + environment: + - TZ=Europe/Berlin + volumes: + # Optional: Persist temporary files + - temp_data:/tmp + networks: + - excel-filter-network + healthcheck: + test: ["CMD", "curl", "--fail", "http://localhost:8501/_stcore/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 10s + +volumes: + temp_data: + driver: local + +networks: + excel-filter-network: + driver: bridge \ No newline at end of file diff --git a/excel_filter/requirements.txt b/excel_filter/requirements.txt index c5bf5cb..6d883c4 100644 --- a/excel_filter/requirements.txt +++ b/excel_filter/requirements.txt @@ -3,3 +3,4 @@ pandas>=2.0.3 python-docx>=1.1.0 pytest>=8.0.0 psutil>=5.8.0 +streamlit>=1.28.0 diff --git a/run_streamlit.bat b/run_streamlit.bat new file mode 100644 index 0000000..8af2ab3 --- /dev/null +++ b/run_streamlit.bat @@ -0,0 +1,55 @@ +@echo off +REM Excel Filter Tool - Streamlit App Runner +REM This script starts the Streamlit web application + +echo ======================================== +echo Excel Filter Tool - Web Application +echo ======================================== +echo. + +REM Check if Python is installed +python --version >nul 2>&1 +if errorlevel 1 ( + echo Error: Python is not installed or not in PATH + pause + exit /b 1 +) + +REM Check if we're in the correct directory +if not exist "streamlit_app.py" ( + echo Error: streamlit_app.py not found + echo Please run this script from the project root directory + pause + exit /b 1 +) + +REM Create virtual environment if it doesn't exist +if not exist "venv" ( + echo Creating virtual environment... + python -m venv venv +) + +REM Activate virtual environment +echo Activating virtual environment... +call venv\Scripts\activate.bat + +REM Install/upgrade pip +echo Upgrading pip... +python -m pip install --upgrade pip -q + +REM Install requirements +echo Installing dependencies... +pip install -r excel_filter\requirements.txt -q +pip install streamlit -q + +echo. +echo Starting Streamlit server... +echo The app will open in your browser automatically. +echo Press Ctrl+C to stop the server. +echo. + +REM Run Streamlit app +streamlit run streamlit_app.py + +REM Deactivate virtual environment on exit +call venv\Scripts\deactivate.bat \ No newline at end of file diff --git a/run_streamlit.sh b/run_streamlit.sh new file mode 100644 index 0000000..25af4cd --- /dev/null +++ b/run_streamlit.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Excel Filter Tool - Streamlit App Runner +# This script starts the Streamlit web application + +echo "========================================" +echo " Excel Filter Tool - Web Application" +echo "========================================" +echo "" + +# Check if Python is installed +if ! command -v python &> /dev/null; then + echo "Error: Python is not installed or not in PATH" + exit 1 +fi + +# Check if we're in the correct directory +if [ ! -f "streamlit_app.py" ]; then + echo "Error: streamlit_app.py not found" + echo "Please run this script from the project root directory" + exit 1 +fi + +# Create virtual environment if it doesn't exist +if [ ! -d "venv" ]; then + echo "Creating virtual environment..." + python -m venv venv +fi + +# Activate virtual environment +echo "Activating virtual environment..." +source venv/bin/activate 2>/dev/null || source venv/Scripts/activate 2>/dev/null + +# Install/upgrade pip +echo "Upgrading pip..." +pip install --upgrade pip -q + +# Install requirements +echo "Installing dependencies..." +pip install -r excel_filter/requirements.txt -q +pip install streamlit -q + +echo "" +echo "Starting Streamlit server..." +echo "The app will open in your browser automatically." +echo "Press Ctrl+C to stop the server." +echo "" + +# Run Streamlit app +streamlit run streamlit_app.py --server.headless=true + +# Deactivate virtual environment on exit +deactivate \ No newline at end of file diff --git a/streamlit_app.py b/streamlit_app.py new file mode 100644 index 0000000..3c76da0 --- /dev/null +++ b/streamlit_app.py @@ -0,0 +1,698 @@ +import streamlit as st +import pandas as pd +import re +from io import BytesIO +from typing import List, Dict, Any, Optional +import time + +# Seitenkonfiguration +st.set_page_config( + page_title="Excel Filter Tool", + page_icon=None, + layout="wide", + initial_sidebar_state="expanded" +) + +# --- Regex Bausteine --- +REGEX_BRICKS = { + "Exakter Text": { + "regex": "{}", + "desc": "Findet den exakten Text, den du eingibst. Sonderzeichen werden automatisch maskiert.", + "needs_input": True, + "allows_quantifier": True + }, + "Ziffer (0-9)": { + "regex": r"\d", + "desc": "Findet eine einzelne Ziffer von 0 bis 9.", + "needs_input": False, + "allows_quantifier": True + }, + "Buchstabe (A-Z, a-z)": { + "regex": r"[a-zA-Z]", + "desc": "Findet einen einzelnen Buchstaben des deutschen Alphabets, sowohl Groß- als auch Kleinschreibung. Achtung, gilt nicht fĂŒr Umlaute!", + "needs_input": False, + "allows_quantifier": True + }, + "Leerzeichen": { + "regex": r"\s", + "desc": "Findet Leerzeichen, Tabulatoren und ZeilenumbrĂŒche.", + "needs_input": False, + "allows_quantifier": True + }, + "Beliebiges einzelnes Zeichen": { + "regex": r".", + "desc": "Findet genau ein beliebiges Zeichen (Buchstabe, Ziffer, Symbol oder Leerzeichen).", + "needs_input": False, + "allows_quantifier": True + }, + "Beliebige Zeichenfolge": { + "regex": r".*", + "desc": "Findet null oder mehr beliebige Zeichen. NĂŒtzlich als breiter Platzhalter.", + "needs_input": False, + "allows_quantifier": False + }, + "ODER (Alternative)": { + "regex": r"|", + "desc": "Funktioniert als logischer ODER-Operator. Das Muster findet entweder den Ausdruck davor oder danach.", + "needs_input": False, + "allows_quantifier": False + }, + "Zeilenanfang": { + "regex": r"^", + "desc": "Verankert den Treffer am Anfang einer Zeile oder Zeichenkette.", + "needs_input": False, + "allows_quantifier": False + }, + "Zeilenende": { + "regex": r"$", + "desc": "Verankert den Treffer am Ende einer Zeile oder Zeichenkette.", + "needs_input": False, + "allows_quantifier": False + } +} + +QUANTIFIERS = { + "Genau 1 (Standard)": "", + "1 oder mehr (+)": "+", + "0 oder mehr (*)": "*", + "Optional: 0 oder 1 (?)": "?" +} + +def get_pattern_presets() -> Dict[str, str]: + return { + "Fehler & Warnungen": r"error|warning|critical|fehler|warnung", + "Nur Fehler": r"error|fehler", + "Nur Warnungen": r"warning|warnung", + "Kritische Fehler": r"critical|kritisch", + "E-Mail-Adressen": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", + "Telefonnummern": r"\+?[0-9\s-]{10,}", + "Datum (JJJJ-MM-TT)": r"\d{4}-\d{2}-\d{2}", + } + +def init_session_state(): + defaults = { + "df": None, + "sheets": [], + "selected_sheet": None, + "columns": [], + "filtered_df": None, + "stats": None, + "regex_enabled": True, + "numeric_enabled": False, + "column_selection_enabled": False, + "selected_columns": [], + "regex_pattern": "", + "regex_test_text": "", + "regex_blocks": [], + "temp_block_val": "", + "temp_quantifier": "Genau 1 (Standard)" + } + for key, val in defaults.items(): + if key not in st.session_state: + st.session_state[key] = val + +def load_excel_file(uploaded_file) -> tuple: + try: + file_bytes = BytesIO(uploaded_file.getvalue()) + xls = pd.ExcelFile(file_bytes) + sheets = xls.sheet_names + file_bytes.seek(0) + df = pd.read_excel(file_bytes, sheet_name=sheets[0]) + return df, sheets, None + except Exception as e: + return None, [], str(e) + +def apply_filters(df: pd.DataFrame, pattern: Optional[str] = None, regex_column: Optional[str] = None, numeric_filter: Optional[Dict[str, Any]] = None, selected_columns: Optional[List[str]] = None) -> tuple: + start_time = time.time() + input_rows = len(df) + input_columns = len(df.columns) + filtered_df = df.copy() + filters_applied = [] + + if pattern and pattern.strip(): + try: + columns_to_search = [regex_column] if regex_column and regex_column != "Alle Spalten" else df.columns.tolist() + regex = re.compile(pattern, re.IGNORECASE) + mask = filtered_df.apply(lambda row: any(regex.search(str(row[col])) for col in columns_to_search if col in row and pd.notna(row[col])), axis=1) + filtered_df = filtered_df[mask] + filters_applied.append("Regex") + except re.error as e: + return None, None, f"UngĂŒltiges Regex-Muster: {e}" + + if numeric_filter and numeric_filter.get("column"): + try: + column = numeric_filter["column"] + operator = numeric_filter["operator"] + value = numeric_filter["value"] + + if column == "Alle Spalten": + combined_mask = pd.Series([False] * len(filtered_df), index=filtered_df.index) + for col in filtered_df.columns: + num_series = pd.to_numeric(filtered_df[col], errors='coerce') + col_mask = eval(f"num_series {operator} value") + combined_mask = combined_mask | col_mask + filtered_df = filtered_df[combined_mask] + else: + num_series = pd.to_numeric(filtered_df[column], errors='coerce') + filtered_df = filtered_df[eval(f"num_series {operator} value")] + filters_applied.append("Numerisch") + except Exception as e: + return None, None, f"Fehler beim Anwenden des numerischen Filters: {e}" + + if selected_columns: + available_columns = [col for col in selected_columns if col in filtered_df.columns] + if available_columns: + filtered_df = filtered_df[available_columns] + + end_time = time.time() + stats = { + "input_rows": input_rows, + "input_columns": input_columns, + "output_rows": len(filtered_df), + "output_columns": len(filtered_df.columns), + "rows_removed": input_rows - len(filtered_df), + "processing_time": end_time - start_time, + "filters_applied": filters_applied, + "retention_rate": (len(filtered_df) / input_rows * 100) if input_rows > 0 else 0 + } + return filtered_df, stats, None + +def explain_regex_german(blocks: List[Dict]) -> str: + """Übersetzt die Regex-Bausteine in einen deutschen Satz.""" + if not blocks: + return "Muster ist leer." + + explanations = [] + for block in blocks: + b_type = block["key"] + val = block.get("value", "") + q_key = block.get("quantifier_key", "Genau 1 (Standard)") + + # 1. Grundbegriff + if "Exakter Text" in b_type: noun = f"den exakten Text '{val}'" + elif "Ziffer" in b_type: noun = "eine Ziffer (0-9)" + elif "Buchstabe" in b_type: noun = "einen Buchstaben (A-Z oder a-z)" + elif "Leerzeichen" in b_type: noun = "ein Leerzeichen" + elif "Beliebiges einzelnes Zeichen" in b_type: noun = "ein beliebiges einzelnes Zeichen" + elif "Beliebige Zeichenfolge" in b_type: noun = "eine beliebige Zeichenfolge" + elif "ODER" in b_type: noun = "ODER" + elif "Zeilenanfang" in b_type: noun = "den Anfang der Zeichenkette" + elif "Zeilenende" in b_type: noun = "das Ende der Zeichenkette" + else: noun = "ein Element" + + # 2. Quantoren anwenden + if noun not in ["ODER", "den Anfang der Zeichenkette", "das Ende der Zeichenkette", "eine beliebige Zeichenfolge"]: + if "1 oder mehr" in q_key: + noun = f"eine oder mehr {noun.replace('eine ', '').replace('einen ', '').replace('ein ', '').replace('den ', '').replace('das ', '')}en" if any(noun.startswith(x) for x in ["eine ", "einen ", "ein ", "den ", "das "]) else f"ein oder mehr {noun}" + elif "0 oder mehr" in q_key: + noun = f"null oder mehr {noun.replace('eine ', '').replace('einen ', '').replace('ein ', '').replace('den ', '').replace('das ', '')}en" if any(noun.startswith(x) for x in ["eine ", "einen ", "ein ", "den ", "das "]) else f"null oder mehr {noun}" + elif "Optional" in q_key: + noun = f"ein optionales {noun.replace('eine ', '').replace('einen ', '').replace('ein ', '').replace('den ', '').replace('das ', '')}" if any(noun.startswith(x) for x in ["eine ", "einen ", "ein ", "den ", "das "]) else f"ein optionales {noun}" + + explanations.append(noun) + + # 3. ZusammenfĂŒgen + sentence = "" + for i, exp in enumerate(explanations): + if i == 0: + sentence += exp + else: + if exp == "ODER" or explanations[i-1] == "ODER": + sentence += f" {exp} " + else: + sentence += f", gefolgt von {exp}" + + return sentence[:1].upper() + sentence[1:] + "." + +def apply_sleek_dark_theme(): + st.markdown(""" + + """, unsafe_allow_html=True) + +def render_pipeline_tab(): + st.markdown('
Schritt 1: Datei auswÀhlen
', unsafe_allow_html=True) + uploaded_file = st.file_uploader("Lade eine Excel-Datei hoch (.xlsx, .xls)", type=["xlsx", "xls"], label_visibility="collapsed") + + if uploaded_file: + if uploaded_file != st.session_state.get("last_uploaded"): + st.session_state.last_uploaded = uploaded_file + with st.spinner("Lade..."): + df, sheets, error = load_excel_file(uploaded_file) + if error: + st.error(f"Fehler: {error}") + else: + st.session_state.df = df + st.session_state.sheets = sheets + st.session_state.selected_sheet = sheets[0] + st.session_state.columns = df.columns.tolist() + st.session_state.filtered_df = None + st.session_state.stats = None + st.session_state.selected_columns = list(df.columns) + + st.markdown(f'
Datei geladen: {uploaded_file.name}
', unsafe_allow_html=True) + col1, _ = st.columns([1, 2]) + with col1: + current_idx = st.session_state.sheets.index(st.session_state.selected_sheet) if st.session_state.selected_sheet in st.session_state.sheets else 0 + selected_sheet = st.selectbox("Arbeitsblatt auswÀhlen", st.session_state.sheets, index=current_idx) + if selected_sheet != st.session_state.selected_sheet: + st.session_state.selected_sheet = selected_sheet + st.session_state.df = pd.read_excel(BytesIO(st.session_state.last_uploaded.getvalue()), sheet_name=selected_sheet) + st.session_state.columns = st.session_state.df.columns.tolist() + st.session_state.selected_columns = list(st.session_state.df.columns) + st.rerun() + + if st.session_state.df is not None: + st.markdown('
Schritt 2: Filter konfigurieren
', unsafe_allow_html=True) + filter_tabs = st.tabs(["Regex-Filter", "Numerischer Filter", "Spaltenauswahl"]) + + with filter_tabs[0]: + st.write("") + st.session_state.regex_enabled = st.checkbox("Regex-Filter aktivieren", value=st.session_state.regex_enabled) + if st.session_state.regex_enabled: + col1, col2 = st.columns([2, 1]) + with col1: + presets = get_pattern_presets() + preset_names = ["-- Vorlagen --"] + list(presets.keys()) + def on_preset_change(): + sel = st.session_state.preset_selector + if sel != preset_names[0] and sel in presets: + st.session_state.regex_pattern = presets[sel] + st.selectbox("Vorlage laden", preset_names, key="preset_selector", on_change=on_preset_change) + st.session_state.regex_pattern = st.text_input("Aktives Regex-Muster (Nutze den 'Regex-Builder' zum visuellen Erstellen)", value=st.session_state.regex_pattern, placeholder="z.B. ^Fehler.*") + with col2: + regex_column = st.selectbox("Spalte fĂŒr Regex-Filter", ["Alle Spalten"] + st.session_state.columns) + + with filter_tabs[1]: + st.write("") + st.session_state.numeric_enabled = st.checkbox("Numerischen Filter aktivieren", value=st.session_state.numeric_enabled) + if st.session_state.numeric_enabled: + col1, col2, col3 = st.columns([2, 2, 1]) + with col1: + numeric_column = st.selectbox("Spalte", ["Alle Spalten"] + st.session_state.columns) + with col2: + ops = {">": ">", "<": "<", ">=": ">=", "<=": "<=", "=": "="} + selected_op = st.selectbox("Vergleichsoperator", list(ops.values())) + numeric_operator = [k for k, v in ops.items() if v == selected_op][0] + with col3: + numeric_value = st.text_input("Wert") + + with filter_tabs[2]: + st.write("") + st.session_state.column_selection_enabled = st.checkbox("Spaltenauswahl aktivieren", value=st.session_state.column_selection_enabled) + if st.session_state.column_selection_enabled: + col_btn1, col_btn2, _ = st.columns([1, 1, 3]) + with col_btn1: + if st.button("Alle auswĂ€hlen", use_container_width=True): + st.session_state.selected_columns = list(st.session_state.columns) + st.rerun() + with col_btn2: + if st.button("Alle abwĂ€hlen", use_container_width=True): + st.session_state.selected_columns = [] + st.rerun() + st.session_state.selected_columns = st.multiselect("Spalten auswĂ€hlen", st.session_state.columns, default=st.session_state.get("selected_columns", st.session_state.columns)) + + st.markdown('
Schritt 3: Ergebnisse & Export
', unsafe_allow_html=True) + if st.button("Filter anwenden", type="primary"): + val_error = None + if st.session_state.regex_enabled and not st.session_state.regex_pattern: val_error = "Bitte gib ein Regex-Muster ein" + if st.session_state.numeric_enabled and not numeric_value: val_error = "Bitte gib einen numerischen Wert ein" + + if val_error: + st.error(val_error) + else: + num_dict = {"column": numeric_column, "operator": numeric_operator, "value": float(numeric_value)} if (st.session_state.numeric_enabled and numeric_value) else None + cols = st.session_state.selected_columns if st.session_state.column_selection_enabled else None + with st.spinner("Verarbeite..."): + filtered_df, stats, err = apply_filters(st.session_state.df, pattern=st.session_state.regex_pattern if st.session_state.regex_enabled else None, regex_column=regex_column if st.session_state.regex_enabled else None, numeric_filter=num_dict, selected_columns=cols) + if err: + st.error(f"Fehler: {err}") + else: + st.session_state.filtered_df = filtered_df + st.session_state.stats = stats + st.success("Filter erfolgreich angewendet.") + + if st.session_state.filtered_df is not None and st.session_state.stats is not None: + st.divider() + sc1, sc2, sc3, sc4 = st.columns(4) + sc1.metric("Eingabezeilen", f"{st.session_state.stats['input_rows']:,}") + sc2.metric("Ausgabezeilen", f"{st.session_state.stats['output_rows']:,}") + sc3.metric("Entfernte Zeilen", f"{st.session_state.stats['rows_removed']:,}") + sc4.metric("Trefferquote", f"{st.session_state.stats['retention_rate']:.1f}%") + + st.write("") + st.dataframe(st.session_state.filtered_df.head(100), use_container_width=True, height=300) + st.caption(f"Zeige {min(100, len(st.session_state.filtered_df))} von {len(st.session_state.filtered_df)} Zeilen") + + out_buf = BytesIO() + with pd.ExcelWriter(out_buf, engine='openpyxl') as writer: + st.session_state.filtered_df.to_excel(writer, index=False) + out_buf.seek(0) + st.download_button("Gefilterte Datei herunterladen", out_buf, "gefilterte_ausgabe.xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", type="primary") + +def render_regex_builder_tab(): + st.markdown('
Visueller Regex-Builder
', unsafe_allow_html=True) + st.markdown("Erstelle Suchmuster durch Kombinieren von modularen Bausteinen. Das Muster wird von oben nach unten aufgebaut.") + st.write("") + + col_build, col_test = st.columns([1.2, 1]) + + with col_build: + st.markdown("### 1. Bausteine hinzufĂŒgen") + + selected_block_key = st.selectbox("WĂ€hle einen Bausteintyp:", list(REGEX_BRICKS.keys())) + block_data = REGEX_BRICKS[selected_block_key] + + st.markdown(f'
Beschreibung: {block_data["desc"]}
', unsafe_allow_html=True) + + col1, col2 = st.columns(2) + with col1: + if block_data["needs_input"]: + st.text_input("Text zum Suchen:", placeholder="z.B. Fehler", key="temp_block_val") + else: + st.session_state.temp_block_val = "" + with col2: + if block_data["allows_quantifier"]: + st.selectbox("Wie oft:", list(QUANTIFIERS.keys()), key="temp_quantifier") + else: + st.session_state.temp_quantifier = "Genau 1 (Standard)" + + def add_block_callback(): + st.session_state.regex_blocks.append({ + "key": selected_block_key, + "value": st.session_state.temp_block_val if block_data["needs_input"] else "", + "regex_format": block_data["regex"], + "quantifier_key": st.session_state.temp_quantifier if block_data["allows_quantifier"] else "Genau 1 (Standard)" + }) + st.session_state.temp_block_val = "" + st.session_state.temp_quantifier = "Genau 1 (Standard)" + + st.button("Baustein hinzufĂŒgen", use_container_width=True, on_click=add_block_callback) + + st.write("---") + st.markdown("### 2. Bausteine als Reihe") + if not st.session_state.regex_blocks: + st.info("Die Bausteinreihe ist leer. WĂ€hle oben einen Baustein und klicke auf HinzufĂŒgen.") + else: + for i, block in enumerate(st.session_state.regex_blocks): + c1, c2 = st.columns([5, 1]) + with c1: + val_display = f" {block['value']}" if block['value'] else "" + q_key = block.get('quantifier_key', 'Genau 1 (Standard)') + q_display = f"   [ {q_key} ]" if q_key != "Genau 1 (Standard)" else "" + + st.markdown(f'
{block["key"]}{val_display}{q_display}
', unsafe_allow_html=True) + with c2: + if st.button("Entfernen", key=f"del_block_{i}", help="Diesen Baustein entfernen"): + st.session_state.regex_blocks.pop(i) + st.rerun() + + # Kompiliere die visuellen Bausteine zu echtem Regex-Code + regex_parts = [] + for block in st.session_state.regex_blocks: + q_val = QUANTIFIERS[block.get("quantifier_key", "Genau 1 (Standard)")] + if block["value"]: + safe_val = re.escape(block["value"]) + if q_val: + regex_parts.append(f"(?:{safe_val}){q_val}") + else: + regex_parts.append(safe_val) + else: + regex_parts.append(f"{block['regex_format']}{q_val}") + + generated_regex = "".join(regex_parts) + + with col_test: + st.markdown("### 3. Muster testen") + + st.markdown("**Muster-ErklÀrung:**") + german_translation = explain_regex_german(st.session_state.regex_blocks) + st.markdown(f'
"{german_translation}"
', unsafe_allow_html=True) + + st.markdown("**Generiertes Regex:**") + st.code(generated_regex if generated_regex else "(leer)", language="regex") + + test_text = st.text_input("Testtext", value=st.session_state.regex_test_text, placeholder="Gib einen Beispieltext ein...") + st.session_state.regex_test_text = test_text + + test_btn = st.button("Muster testen", use_container_width=True) + + if (test_btn or test_text) and generated_regex and test_text: + try: + regex = re.compile(generated_regex, re.IGNORECASE) + matches = regex.findall(test_text) + + if matches: + st.success(f"{len(matches)} Treffer gefunden.") + highlighted = test_text + for match in set(matches): + if match: + highlighted = highlighted.replace(match, f"**{match}**") + st.markdown(f"> {highlighted}") + else: + st.warning("Keine Treffer gefunden. Passe dein Muster oder den Testtext an.") + except re.error: + st.error("UngĂŒltige Mustersyntax. VervollstĂ€ndige die Musterfolge.") + + st.write("") + if st.button("Auf Excel-Pipeline anwenden", type="primary", use_container_width=True): + if generated_regex: + st.session_state.regex_pattern = generated_regex + st.success("Muster angewendet. Wechsle zum Tab 'Excel-Filter' um deine Daten zu filtern.") + else: + st.error("Kann kein leeres Muster anwenden.") + +def render_help_tab(): + st.markdown('
Hilfe
', unsafe_allow_html=True) + + st.markdown(""" + + ### Tab: Excel-Pipeline + + **Schritt 1: Datei auswĂ€hlen** + - Lade Excel-Dateien im Format `.xlsx` oder `.xls` hoch + - WĂ€hle das zu verarbeitende Arbeitsblatt, falls die Datei mehrere enthĂ€lt + + **Schritt 2: Filter konfigurieren** + + *Regex-Filter:* + - Gib ein regulĂ€res Ausdrucksmuster manuell ein oder nutze den visuellen Regex-Builder + - WĂ€hle eine bestimmte Spalte oder durchsuche alle Spalten + - Mustersuche ist standardmĂ€ĂŸig Groß-/Kleinschreibung unabhĂ€ngig + + *Numerischer Filter:* + - Filtere Zeilen basierend auf numerischen Vergleichen (>, <, >=, <=, =) + - Wende auf eine bestimmte Spalte oder alle (numerischen) Spalten an + + *Spaltenauswahl:* + - WĂ€hle, welche Spalten in der Ausgabe enthalten sein sollen + - Nutze "Alle auswĂ€hlen" / "Alle abwĂ€hlen" fĂŒr schnelle Auswahl + + **Schritt 3: Ergebnisse & Export** + - ÜberprĂŒfe die Filterstatistiken (beibehaltene Zeilen, entfernte Zeilen, Trefferquote) + - Zeige die gefilterten Daten an (erste 100 Zeilen) + - Lade den gefilterten Datensatz als neue Excel-Datei herunter + + --- + + ### Tab: Visueller Regex-Builder + + Erstelle regulĂ€re AusdrĂŒcke visuell durch Kombinieren modularer Bausteine: + + **VerfĂŒgbare Bausteine:** + + | Baustein | Beschreibung | Regex-Äquivalent | + |----------|--------------|------------------| + | Exakter Text | Findet wörtlichen Text | `text` | + | Ziffer (0-9) | Findet eine einzelne Ziffer | `\d` | + | Buchstabe (A-Z, a-z) | Findet einen Buchstaben | `[a-zA-Z]` | + | Leerzeichen | Findet Leerzeichen, Tabs, ZeilenumbrĂŒche | `\s` | + | Beliebiges einzelnes Zeichen | Platzhalter fĂŒr ein Zeichen | `.` | + | Beliebige Zeichenfolge | Platzhalter fĂŒr beliebigen Inhalt | `.*` | + | ODER (Alternative) | Findet entweder Ausdruck davor oder danach | `\|` | + | Zeilenanfang | Verankert am Anfang | `^` | + | Zeilenende | Verankert am Ende | `$` | + + **Quantifizierer:** + - **Genau 1:** Findet das Element einmal (Standard) + - **1 oder mehr (+):** Findet ein oder mehr Vorkommen + - **0 oder mehr (*):** Findet null oder mehr Vorkommen + - **Optional (?):** Findet null oder ein Vorkommen + + **Beispiel: Fehlercodes finden** + + Um Zeilen zu finden, die mit "Fehler" beginnen, gefolgt von einem Leerzeichen und Ziffern (z.B. "Fehler 404"): + + 1. FĂŒge "Zeilenanfang" hinzu - verankert am Zeilenanfang + 2. FĂŒge "Exakter Text" mit Wert "Fehler" hinzu + 3. FĂŒge "Leerzeichen" mit Quantifizierer "1 oder mehr (+)" hinzu + 4. FĂŒge "Ziffer (0-9)" mit Quantifizierer "1 oder mehr (+)" hinzu + + Generiertes Muster: `^Fehler\s+\d+` + + --- + + ### Tipps fĂŒr effektives Filtern + + 1. **Teste Muster zuerst:** Nutze die Testfunktion des Regex-Builders, bevor du ihn auf den kompletten Datensatz anwendest + 2. **Kombiniere Filter sorgfĂ€ltig:** Mehrere Filter können gleichzeitig aktiviert werden; sie arbeiten nacheinander + 3. **Spaltenauswahl reduziert DateigrĂ¶ĂŸe:** Exportiere nur benötigte Spalten um Ergebnisse zu straffen + 4. **Groß-/Kleinschreibung:** Alle Regex-Muster ignorieren standardmĂ€ĂŸig Groß-/Kleinschreibung + 5. **Sonderzeichen in exaktem Text:** Der Builder maskiert spezielle Regex-Zeichen automatisch + + --- + + + """) + +def main(): + init_session_state() + apply_sleek_dark_theme() + + with st.sidebar: + st.markdown("### DatenĂŒbersicht") + + if st.session_state.df is not None: + # Dateiinformationen + st.markdown("**Infos**") + st.caption(f"Aktives Blatt: {st.session_state.selected_sheet}") + + st.divider() + + # Zeilenstatistik + st.markdown("**Statistik**") + st.metric("Alle Zeilen", f"{len(st.session_state.df):,}") + st.metric("Alle Spalten", f"{len(st.session_state.df.columns):,}") + + st.divider() + + # Datentypen-Zusammenfassung + st.markdown("**Spaltentypen**") + dtype_counts = st.session_state.df.dtypes.value_counts() + for dtype, count in dtype_counts.items(): + st.caption(f"{dtype}: {count} Spalte(n)") + + st.divider() + + # Filterstatus + if st.session_state.stats is not None: + st.markdown("**Filterergebnisse**") + st.metric("Ausgabezeilen", f"{st.session_state.stats['output_rows']:,}") + st.metric("Trefferquote", f"{st.session_state.stats['retention_rate']:.1f}%") + if st.session_state.stats['filters_applied']: + st.caption(f"Filter: {', '.join(st.session_state.stats['filters_applied'])}") + + st.divider() + + # Speicherverbrauch + memory_mb = st.session_state.df.memory_usage(deep=True).sum() / 1024 / 1024 + st.caption(f"Speicher: {memory_mb:.1f} MB") + st.caption("*Version 0.1* ") + + else: + st.info("Lade eine Excel-Datei hoch, um Statistiken anzuzeigen.") + + st.divider() + st.markdown("**Anleitung**") + st.markdown(""" + 1. Excel-Datei hochladen + 2. Filter konfigurieren (optional) + 3. "Filter anwenden" klicken + 4. Ergebnisse herunterladen + """) + + st.title("Excel Filter Tool") + st.markdown("*TemporĂ€re Session: Es werden keine Daten und Einstellungen gespeichert!* ") + st.write("") + + main_tabs = st.tabs(["Excel-Filter", "Regex-Builder", "Hilfe"]) + with main_tabs[0]: render_pipeline_tab() + with main_tabs[1]: render_regex_builder_tab() + with main_tabs[2]: render_help_tab() + +if __name__ == "__main__": + main() \ No newline at end of file