kh3rld · kh3rld · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/.github/scripts/check_regressions.py b/.github/scripts/check_regressions.py
@@ -0,0 +1,47 @@
+"""
+check_regressions.py
+
+Compare current benchmark results to a baseline and exit nonzero if regression detected.
+"""
+import argparse
+import sqlite3
+import sys
+import pandas as pd
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Check for benchmark regressions.")
+    parser.add_argument('--db-file', required=True, help='SQLite database file')
+    parser.add_argument('--current-commit', required=True, help='Current commit SHA')
+    parser.add_argument('--baseline-branch', required=True, help='Baseline branch name')
+    parser.add_argument('--threshold-percentage', type=float, default=5.0, help='Regression threshold (%)')
+    return parser.parse_args()
+
+def main():
+    args = parse_args()
+    conn = sqlite3.connect(args.db_file)
+    df = pd.read_sql_query("SELECT * FROM benchmarks", conn)
+    if df.empty:
+        print("No benchmark data found.")
+        sys.exit(0)
+    # Get current and baseline results
+    current = df[df['commit_sha'] == args.current_commit]
+    baseline = df[(df['branch'] == args.baseline_branch)]
+    if current.empty or baseline.empty:
+        print("No data for current commit or baseline branch.")
+        sys.exit(0)
+    # Compare each metric
+    regression_found = False
+    for metric in current['metric_name'].unique():
+        cur_val = current[current['metric_name'] == metric]['value'].mean()
+        base_val = baseline[baseline['metric_name'] == metric]['value'].mean()
+        if cur_val > base_val * (1 + args.threshold_percentage / 100):
+            print(f"Regression detected in {metric}: {cur_val:.3f} > {base_val:.3f} (+{args.threshold_percentage}% threshold)")
+            regression_found = True
+    conn.close()
+    if regression_found:
+        sys.exit(1)
+    print("No regressions detected.")
+    sys.exit(0)
+
+if __name__ == '__main__':
+    main()
diff --git a/.github/scripts/generate_visualizations.py b/.github/scripts/generate_visualizations.py
@@ -0,0 +1,47 @@
+"""
+generate_visualizations.py
+
+Generate interactive Plotly charts and Matplotlib trend lines from benchmark data.
+"""
+import argparse
+import os
+import sqlite3
+import pandas as pd
+import plotly.express as px
+import matplotlib.pyplot as plt
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Generate benchmark visualizations.")
+    parser.add_argument('--db-file', required=True, help='SQLite database file')
+    parser.add_argument('--output-dir', required=True, help='Directory to save reports/plots')
+    return parser.parse_args()
+
+def main():
+    args = parse_args()
+    os.makedirs(args.output_dir, exist_ok=True)
+    conn = sqlite3.connect(args.db_file)
+    df = pd.read_sql_query("SELECT * FROM benchmarks", conn)
+    if df.empty:
+        print("No benchmark data found.")
+        return
+    # Plotly interactive chart (mean value by commit)
+    fig = px.line(df, x='timestamp', y='value', color='metric_name',
+                  title='Benchmark Trends', markers=True, hover_data=['commit_sha', 'branch'])
+    fig.write_html(os.path.join(args.output_dir, 'benchmark_trends.html'))
+    # Matplotlib trend line (for each metric)
+    for metric in df['metric_name'].unique():
+        metric_df = df[df['metric_name'] == metric]
+        plt.figure(figsize=(10, 4))
+        plt.plot(metric_df['timestamp'], metric_df['value'], marker='o')
+        plt.title(f'Trend for {metric}')
+        plt.xlabel('Timestamp')
+        plt.ylabel('Value')
+        plt.grid(True)
+        plt.tight_layout()
+        plt.savefig(os.path.join(args.output_dir, f'{metric}_trend.png'))
+        plt.close()
+    print(f"Visualizations saved to {args.output_dir}")
+    conn.close()
+
+if __name__ == '__main__':
+    main()
diff --git a/.github/scripts/store_benchmarks.py b/.github/scripts/store_benchmarks.py
@@ -0,0 +1,81 @@
+"""
+store_benchmarks.py
+
+Parse Criterion benchmark results and store them in an SQLite database.
+"""
+import argparse
+import json
+import os
+import sqlite3
+from datetime import datetime
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Store benchmark results in SQLite DB.")
+    parser.add_argument('--results-dir', required=True, help='Directory with Criterion JSON results')
+    parser.add_argument('--db-file', required=True, help='SQLite database file')
+    parser.add_argument('--commit-sha', required=True, help='Git commit SHA')
+    parser.add_argument('--branch', required=True, help='Git branch name')
+    return parser.parse_args()
+
+def ensure_tables(conn):
+    conn.execute('''CREATE TABLE IF NOT EXISTS benchmarks (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        commit_sha TEXT NOT NULL,
+        branch TEXT NOT NULL,
+        metric_category TEXT NOT NULL,
+        metric_name TEXT NOT NULL,
+        value REAL NOT NULL,
+        timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+    )''')
+    conn.execute('''CREATE TABLE IF NOT EXISTS runs (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        commit_sha TEXT NOT NULL,
+        branch TEXT NOT NULL,
+        run_time DATETIME DEFAULT CURRENT_TIMESTAMP
+    )''')
+    conn.commit()
+
+def store_benchmarks(conn, results_dir, commit_sha, branch):
+    found_benchmarks = False
+    for root, _, files in os.walk(results_dir):
+        for file in files:
+            if file.endswith('.json'):
+                path = os.path.join(root, file)
+                with open(path) as f:
+                    try:
+                        data = json.load(f)
+                    except Exception as e:
+                        print(f"Warning: Could not parse {path}: {e}")
+                        continue
+                if isinstance(data, dict) and 'benchmarks' in data:
+                    benchmarks = data['benchmarks']
+                elif isinstance(data, list):
+                    benchmarks = data
+                else:
+                    continue
+                for bench in benchmarks:
+                    if not isinstance(bench, dict):
+                        continue
+                    name = bench.get('name', 'unknown')
+                    mean = bench.get('mean', {}).get('point_estimate')
+                    if mean is not None:
+                        conn.execute(
+                            'INSERT INTO benchmarks (commit_sha, branch, metric_category, metric_name, value) VALUES (?, ?, ?, ?, ?)',
+                            (commit_sha, branch, 'criterion', name, mean)
+                        )
+                        found_benchmarks = True
+    if not found_benchmarks:
+        print(f"No benchmark data found in {results_dir}. If this is the first run, this is expected.")
+    conn.execute('INSERT INTO runs (commit_sha, branch) VALUES (?, ?)', (commit_sha, branch))
+    conn.commit()
+
+def main():
+    args = parse_args()
+    conn = sqlite3.connect(args.db_file)
+    ensure_tables(conn)
+    store_benchmarks(conn, args.results_dir, args.commit_sha, args.branch)
+    print(f"Benchmarks stored for commit {args.commit_sha} on branch {args.branch}.")
+    conn.close()
+
+if __name__ == '__main__':
+    main()
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -13,11 +13,10 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Setup Rust
-        uses: dtolnay/rust-toolchain@master
+        uses: dtolnay/rust-toolchain@stable 
         with:
           toolchain: nightly
-          components: llvm-tools-preview, rustfmt, clippy
-
+          components: llvm-tools-preview 
       - name: Cache
         uses: actions/cache@v3
         with:
@@ -26,17 +25,75 @@ jobs:
             ~/.cargo
             target
 
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10' 
+
+      - name: Install Python dependencies
+        run: |
+          pip install plotly matplotlib pandas sqlalchemy # Add any other dependencies your scripts need
+          # Example: pip install -r .github/scripts/requirements.txt
+
       - name: Install competitors
         run: |
           sudo apt-get update
           sudo apt-get install -y ripgrep fd-find grep
 
+      - name: Download previous benchmark database
+        uses: actions/download-artifact@v4
+        with:
+          name: benchmark-database
+          path: benchmarks/ 
+        continue-on-error: true 
+
       - name: Run benchmarks
         run: |
           cargo bench
 
-      - name: Upload results
+      - name: Process benchmarks and update database
+        run: |
+          # This script parses target/criterion, correlates with git info, and updates benchmarks/benchmarks.db
+          # Ensure benchmarks/ directory exists if the script doesn't create it
+          mkdir -p benchmarks
+          python .github/scripts/store_benchmarks.py \
+            --results-dir target/criterion \
+            --db-file benchmarks/benchmarks.db \
+            --commit-sha ${{ github.sha }} \
+            --branch ${{ github.ref_name }}
+        # env:
+          # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Generate benchmark visualizations
+        run: |
+          # This script queries benchmarks/benchmarks.db and generates reports/plots
+          mkdir -p benchmark_reports 
+          python .github/scripts/generate_visualizations.py \
+            --db-file benchmarks/benchmarks.db \
+            --output-dir ./benchmark_reports
+
+      - name: Check for performance regressions
+        run: |
+          # This script compares current benchmarks against a baseline from the DB
+          # It should exit with a non-zero status code if a regression is detected
+          python .github/scripts/check_regressions.py \
+            --db-file benchmarks/benchmarks.db \
+            --current-commit ${{ github.sha }} \
+            --baseline-branch main # Or configure as needed (e.g., previous successful run)
+            # --threshold-percentage 5 # Example: fail if 5% slower
+
+      - name: Upload benchmark artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-artifacts-${{ github.run_id }} 
+          path: |
+            target/criterion
+            benchmark_reports/
+          retention-days: 90
+
+      - name: Upload persistent benchmark database
         uses: actions/upload-artifact@v4
         with:
-          name: benchmark-results
-          path: target/criterion
+          name: benchmark-database 
+          path: benchmarks/benchmarks.db
+          retention-days: 365 
diff --git a/Cargo.toml b/Cargo.toml
@@ -50,6 +50,7 @@ serde = { version = "1.0", features = ["derive"] }
 walkdir = "2.3"
 simplelog = "0.12"
 numfmt = "1.1.1"
+memmap2 = "0.9"
 
 # Development dependencies
 [dev-dependencies]

diff --git a/benchmarks/benchmarks.sql b/benchmarks/benchmarks.sql
@@ -0,0 +1,19 @@
+-- SQLite DB for benchmark tracking
 -- Table: benchmarks
+CREATE TABLE IF NOT EXISTS benchmarks (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    commit_sha TEXT NOT NULL,
+    branch TEXT NOT NULL,
+    metric_category TEXT NOT NULL,
+    metric_name TEXT NOT NULL,
+    value REAL NOT NULL,
+    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+);
+
+-- Table: runs
+CREATE TABLE IF NOT EXISTS runs (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    commit_sha TEXT NOT NULL,
+    branch TEXT NOT NULL,
+    run_time DATETIME DEFAULT CURRENT_TIMESTAMP
+);
diff --git a/src/main.rs b/src/main.rs
@@ -42,7 +42,11 @@ fn main() -> Result<()> {
             extensions: _,
             recursive,
         } => {
-            let regex = build_regex(pattern, mode)?;
+            let regex = if matches!(mode, SearchMode::Regex) {
+                processor::get_or_compile_regex(pattern)?
+            } else {
+                build_regex(pattern, mode)?
+            };
             let matches = Mutex::new(Vec::new());
 
             let files: Vec<_> = walk_dir(&cli.path, *recursive, false)