8000 Fix(benchmark): Benchmark Workflow Improvements and Bug Fixes by kh3rld · Pull Request #5 · kh3rld/rfgrep · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Fix(benchmark): Benchmark Workflow Improvements and Bug Fixes #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Jun 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions .github/scripts/check_regressions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
check_regressions.py

Compare current benchmark results to a baseline and exit nonzero if regression detected.
"""
import argparse
import sqlite3
import sys
import pandas as pd

def parse_args():
parser = argparse.ArgumentParser(description="Check for benchmark regressions.")
parser.add_argument('--db-file', required=True, help='SQLite database file')
parser.add_argument('--current-commit', required=True, help='Current commit SHA')
parser.add_argument('--baseline-branch', required=True, help='Baseline branch name')
parser.add_argument('--threshold-percentage', type=float, default=5.0, help='Regression threshold (%)')
return parser.parse_args()

def main():
args = parse_args()
conn = sqlite3.connect(args.db_file)
df = pd.read_sql_query("SELECT * FROM benchmarks", conn)
if df.empty:
print("No benchmark data found.")
sys.exit(0)
# Get current and baseline results
current = df[df['commit_sha'] == args.current_commit]
baseline = df[(df['branch'] == args.baseline_branch)]
if current.empty or baseline.empty:
print("No data for current commit or baseline branch.")
sys.exit(0)
# Compare each metric
regression_found = False
for metric in current['metric_name'].unique():
cur_val = current[current['metric_name'] == metric]['value'].mean()
base_val = baseline[baseline['metric_name'] == metric]['value'].mean()
if cur_val > base_val * (1 + args.threshold_percentage / 100):
print(f"Regression detected in {metric}: {cur_val:.3f} > {base_val:.3f} (+{args.threshold_percentage}% threshold)")
regression_found = True
conn.close()
if regression_found:
sys.exit(1)
print("No regressions detected.")
sys.exit(0)

if __name__ == '__main__':
main()
47 changes: 47 additions & 0 deletions .github/scripts/generate_visualizations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
generate_visualizations.py

Generate interactive Plotly charts and Matplotlib trend lines from benchmark data.
"""
import argparse
import os
import sqlite3
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

def parse_args():
parser = argparse.ArgumentParser(description="Generate benchmark visualizations.")
parser.add_argument('--db-file', required=True, help='SQLite database file')
parser.add_argument('--output-dir', required=True, help='Directory to save reports/plots')
return parser.parse_args()

def main():
args = parse_args()
os.makedirs(args.output_dir, exist_ok=True)
conn = sqlite3.connect(args.db_file)
df = pd.read_sql_query("SELECT * FROM benchmarks", conn)
if df.empty:
print("No benchmark data found.")
return
# Plotly interactive chart (mean value by commit)
fig = px.line(df, x='timestamp', y='value', color='metric_name',
title='Benchmark Trends', markers=True, hover_data=['commit_sha', 'branch'])
fig.write_html(os.path.join(args.output_dir, 'benchmark_trends.html'))
# Matplotlib trend line (for each metric)
for metric in df['metric_name'].unique():
metric_df = df[df['metric_name'] == metric]
plt.figure(figsize=(10, 4))
plt.plot(metric_df['timestamp'], metric_df['value'], marker='o')
plt.title(f'Trend for {metric}')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.grid(True)
plt.tight_layout()
plt.savefig(os.path.join(args.output_dir, f'{metric}_trend.png'))
plt.close()
print(f"Visualizations saved to {args.output_dir}")
conn.close()

if __name__ == '__main__':
main()
81 changes: 81 additions & 0 deletions .github/scripts/store_benchmarks.py
10000
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""
store_benchmarks.py

Parse Criterion benchmark results and store them in an SQLite database.
"""
import argparse
import json
import os
import sqlite3
from datetime import datetime

def parse_args():
parser = argparse.ArgumentParser(description="Store benchmark results in SQLite DB.")
parser.add_argument('--results-dir', required=True, help='Directory with Criterion JSON results')
parser.add_argument('--db-file', required=True, help='SQLite database file')
parser.add_argument('--commit-sha', required=True, help='Git commit SHA')
parser.add_argument('--branch', required=True, help='Git branch name')
return parser.parse_args()

def ensure_tables(conn):
conn.execute('''CREATE TABLE IF NOT EXISTS benchmarks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
commit_sha TEXT NOT NULL,
branch TEXT NOT NULL,
metric_category TEXT NOT NULL,
metric_name TEXT NOT NULL,
value REAL NOT NULL,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
)''')
conn.execute('''CREATE TABLE IF NOT EXISTS runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
commit_sha TEXT NOT NULL,
branch TEXT NOT NULL,
run_time DATETIME DEFAULT CURRENT_TIMESTAMP
)''')
conn.commit()

def store_benchmarks(conn, results_dir, commit_sha, branch):
found_benchmarks = False
for root, _, files in os.walk(results_dir):
for file in files:
if file.endswith('.json'):
path = os.path.join(root, file)
with open(path) as f:
try:
data = json.load(f)
except Exception as e:
print(f"Warning: Could not parse {path}: {e}")
continue
if isinstance(data, dict) and 'benchmarks' in data:
benchmarks = data['benchmarks']
elif isinstance(data, list):
benchmarks = data
else:
continue
for bench in benchmarks:
if not isinstance(bench, dict):
continue
name = bench.get('name', 'unknown')
mean = bench.get('mean', {}).get('point_estimate')
if mean is not None:
conn.execute(
'INSERT INTO benchmarks (commit_sha, branch, metric_category, metric_name, value) VALUES (?, ?, ?, ?, ?)',
(commit_sha, branch, 'criterion', name, mean)
)
found_benchmarks = True
if not found_benchmarks:
print(f"No benchmark data found in {results_dir}. If this is the first run, this is expected.")
conn.execute('INSERT INTO runs (commit_sha, branch) VALUES (?, ?)', (commit_sha, branch))
conn.commit()

def main():
args = parse_args()
conn = sqlite3.connect(args.db_file)
ensure_tables(conn)
store_benchmarks(conn, args.results_dir, args.commit_sha, args.branch)
print(f"Benchmarks stored for commit {args.commit_sha} on branch {args.branch}.")
conn.close()

if __name__ == '__main__':
main()
69 changes: 63 additions & 6 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,10 @@ jobs:
- uses: actions/checkout@v4

- name: Setup Rust
uses: dtolnay/rust-toolchain@master
uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly
components: llvm-tools-preview, rustfmt, clippy

components: llvm-tools-preview
- name: Cache
uses: actions/cache@v3
with:
Expand All @@ -26,17 +25,75 @@ jobs:
~/.cargo
target

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'

- name: Install Python dependencies
run: |
pip install plotly matplotlib pandas sqlalchemy # Add any other dependencies your scripts need
# Example: pip install -r .github/scripts/requirements.txt

- name: Install competitors
run: |
sudo apt-get update
sudo apt-get install -y ripgrep fd-find grep

- name: Download previous benchmark database
uses: actions/download-artifact@v4
with:
name: benchmark-database
path: benchmarks/
continue-on-error: true

- name: Run benchmarks
run: |
cargo bench

- name: Upload results
- name: Process benchmarks and update database
run: |
# This script parses target/criterion, correlates with git info, and updates benchmarks/benchmarks.db
# Ensure benchmarks/ directory exists if the script doesn't create it
mkdir -p benchmarks
python .github/scripts/store_benchmarks.py \
--results-dir target/criterion \
--db-file benchmarks/benchmarks.db \
--commit-sha ${{ github.sha }} \
--branch ${{ github.ref_name }}
# env:
# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Generate benchmark visualizations
run: |
# This script queries benchmarks/benchmarks.db and generates reports/plots
mkdir -p benchmark_reports
python .github/scripts/generate_visualizations.py \
--db-file benchmarks/benchmarks.db \
--output-dir ./benchmark_reports

- name: Check for performance regressions
run: |
# This script compares current benchmarks against a baseline from the DB
# It should exit with a non-zero status code if a regression is detected
python .github/scripts/check_regressions.py \
--db-file benchmarks/benchmarks.db \
--current-commit ${{ github.sha }} \
--baseline-branch main # Or configure as needed (e.g., previous successful run)
# --threshold-percentage 5 # Example: fail if 5% slower

- name: Upload benchmark artifacts
uses: actions/upload-artifact@v4
with:
name: benchmark-artifacts-${{ github.run_id }}
path: |
target/criterion
benchmark_reports/
retention-days: 90

- name: Upload persistent benchmark database
uses: actions/upload-artifact@v4
with:
name: benchmark-results
path: target/criterion
name: benchmark-database
path: benchmarks/benchmarks.db
retention-days: 365
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ serde = { version = "1.0", features = ["derive"] }
walkdir = "2.3"
simplelog = "0.12"
numfmt = "1.1.1"
memmap2 = "0.9"

# Development dependencies
[dev-dependencies]
Expand Down
19 changes: 19 additions & 0 deletions benchmarks/benchmarks.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-- SQLite DB for benchmark tracking
-- Table: benchmarks
CREATE TABLE IF NOT EXISTS benchmarks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
commit_sha TEXT NOT NULL,
branch TEXT NOT NULL,
metric_category TEXT NOT NULL,
metric_name TEXT NOT NULL,
value REAL NOT NULL,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
);

-- Table: runs
CREATE TABLE IF NOT EXISTS runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
commit_sha TEXT NOT NULL,
branch TEXT NOT NULL,
run_time DATETIME DEFAULT CURRENT_TIMESTAMP
);
6 changes: 5 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ fn main() -> Result<()> {
extensions: _,
recursive,
} => {
let regex = build_regex(pattern, mode)?;
let regex = if matches!(mode, SearchMode::Regex) {
processor::get_or_compile_regex(pattern)?
} else {
build_regex(pattern, mode)?
};
let matches = Mutex::new(Vec::new());

let files: Vec<_> = walk_dir(&cli.path, *recursive, false)
Expand Down
Loading
Loading
0