8000 [Vello Hybrid]: Clipping (Spatiotemporal Allocation) by taj-p · Pull Request #957 · linebender/vello · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

[Vello Hybrid]: Clipping (Spatiotemporal Allocation) #957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
May 7, 2025
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 1 addition & 2 deletions sparse_strips/vello_dev_macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,7 @@ pub fn vello_test(attr: TokenStream, item: TokenStream) -> TokenStream {

// These tests currently don't work with `vello_hybrid`.
skip_hybrid |= {
input_fn_name_str.contains("clip")
|| input_fn_name_str.contains("compose")
input_fn_name_str.contains("compose")
|| input_fn_name_str.contains("gradient")
|| input_fn_name_str.contains("image")
|| input_fn_name_str.contains("layer")
Expand Down
3 changes: 2 additions & 1 deletion sparse_strips/vello_hybrid/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ publish = false
workspace = true

[dependencies]
vello_common = { workspace = true }
bytemuck = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
vello_common = { workspace = true }
wgpu = { workspace = true }

[dev-dependencies]
Expand Down
29 changes: 10 additions & 19 deletions sparse_strips/vello_hybrid/examples/render_to_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use vello_common::kurbo::{Affine, Stroke};
use vello_common::pico_svg::{Item, PicoSvg};
use vello_common::pixmap::Pixmap;
use vello_hybrid::{DimensionConstraints, Scene};
use wgpu::RenderPassDescriptor;

/// Main entry point for the headless rendering example.
/// Takes two command line arguments:
Expand Down Expand Up @@ -91,28 +90,20 @@ async fn run() {
width: width.into(),
height: height.into(),
};
renderer.prepare(&device, &queue, &scene, &render_size);
// Copy texture to buffer
let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Vello Render To Buffer"),
});
{
let mut pass = encoder.begin_render_pass(&RenderPassDescriptor {
label: Some("Render Pass"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: &texture_view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::TRANSPARENT),
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
occlusion_query_set: None,
timestamp_writes: None,
});
renderer.render(&scene, &mut pass);
}
renderer
.render(
&scene,
&device,
&queue,
&mut encoder,
&render_size,
&texture_view,
)
.unwrap();

// Create a buffer to copy the texture data
let bytes_per_row = (u32::from(width) * 4).next_multiple_of(256);
Expand Down
69 changes: 23 additions & 46 deletions sparse_strips/vello_hybrid/examples/webgl/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,13 +170,6 @@ impl AppState {
height: self.height,
};

self.renderer_wrapper.renderer.prepare(
&self.renderer_wrapper.device,
&self.renderer_wrapper.queue,
&self.scene,
&render_size,
);

let surface_texture = self.renderer_wrapper.surface.get_current_texture().unwrap();
let surface_texture_view = surface_texture
.texture
Expand All @@ -186,26 +179,18 @@ impl AppState {
.renderer_wrapper
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
{
let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: &surface_texture_view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
occlusion_query_set: None,
timestamp_writes: None,
});

self.renderer_wrapper
.renderer
.render(&self.scene, &mut pass);
}

self.renderer_wrapper
.renderer
.render(
&self.scene,
&self.renderer_wrapper.device,
&self.renderer_wrapper.queue,
&mut encoder,
&render_size,
&surface_texture_view,
)
.unwrap();

self.renderer_wrapper.queue.submit([encoder.finish()]);
surface_texture.present();
Expand Down Expand Up @@ -504,32 +489,24 @@ pub async fn render_scene(scene: vello_hybrid::Scene, width: u16, height: u16) {
width: width as u32,
height: height as u32,
};
renderer.prepare(&device, &queue, &scene, &render_size);

let surface_texture = surface.get_current_texture().unwrap();
let surface_texture_view = surface_texture
.texture
.create_view(&wgpu::TextureViewDescriptor::default());

let mut encoder =
device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
{
let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: &surface_texture_view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
occlusion_query_set: None,
timestamp_writes: None,
});
renderer.render(&scene, &mut pass);
}

renderer
.render(
&scene,
&device,
&queue,
&mut encoder,
&render_size,
&surface_texture_view,
)
.unwrap();

queue.submit([encoder.finish()]);
surface_texture.present();
Expand Down
39 changes: 12 additions & 27 deletions sparse_strips/vello_hybrid/examples/winit/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use vello_common::color::{AlphaColor, Srgb};
use vello_common::kurbo::{Affine, Vec2};
use vello_hybrid::{RenderSize, Renderer, Scene};
use vello_hybrid_scenes::{AnyScene, get_example_scenes};
use wgpu::RenderPassDescriptor;
use winit::{
application::ApplicationHandler,
event::{ElementState, KeyEvent, MouseButton, MouseScrollDelta, WindowEvent},
Expand Down Expand Up @@ -271,12 +270,6 @@ impl ApplicationHandler for App<'_> {
width: surface.config.width,
height: surface.config.height,
};
self.renderers[surface.dev_id].as_mut().unwrap().prepare(
&device_handle.device,
&device_handle.queue,
&self.scene,
&render_size,
);

let surface_texture = surface
.surface
Expand All @@ -293,26 +286,18 @@ impl ApplicationHandler for App<'_> {
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("Vello Render to Surface pass"),
});
{
let mut pass = encoder.begin_render_pass(&RenderPassDescriptor {
label: Some("Render to Texture Pass"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: &texture_view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
store: wgpu::StoreOp::Store,
},
})],
depth_stencil_attachment: None,
occlusion_query_set: None,
timestamp_writes: None,
});
self.renderers[surface.dev_id]
.as_mut()
.unwrap()
.render(&self.scene, &mut pass);
}
self.renderers[surface.dev_id]
.as_mut()
.unwrap()
.render(
&self.scene,
&device_handle.device,
&device_handle.queue,
&mut encoder,
&render_size,
&texture_view,
)
.unwrap();

device_handle.queue.submit([encoder.finish()]);
surface_texture.present();
Expand Down
51 changes: 51 additions & 0 deletions sparse_strips/vello_hybrid/shaders/clear_slots.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2025 the Vello Authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

// This shader clears specific slots in slot textures to transparent pixels.

// Assumes this texture consists of a single column of slots of `config.slot_height`,
// numbering from 0 to `texture_height / slot_height - 1` from top to bottom.

struct Config {
// Width of a slot (matching `WideTile::WIDTH` and the width of a slot texture).
slot_width: u32,
// Height of a slot (matching `Tile::HEIGHT`)
slot_height: u32,
// Total height of the texture (slot_height * number_of_slots)
texture_height: u32,
// Padding for 16-byte alignment
_padding: u32,
}

@group(0) @binding(0)
var<uniform> config: Config;

@vertex
fn vs_main(
@builtin(vertex_index) vertex_index: u32,
@location(0) index: u32,
) -> @builtin(position) vec4<f32> {
// Map vertex_index (0-3) to quad corners:
// 0 → (0,0), 1 → (1,0), 2 → (0,1), 3 → (1,1)
let x = f32(vertex_index & 1u);
let y = f32(vertex_index >> 1u);

// Calculate the y-position based on the slot index
let slot_y_offset = f32(index * config.slot_height);

// Scale to match slot dimensions
let pix_x = x * f32(config.slot_width);
let pix_y = slot_y_offset + y * f32(config.slot_height);

// Convert to NDC
let ndc_x = pix_x * 2.0 / f32(config.slot_width) - 1.0;
let ndc_y = 1.0 - pix_y * 2.0 / f32(config.texture_height);

return vec4<f32>(ndc_x, ndc_y, 0.0, 1.0);
}

@fragment
fn fs_main(@builtin(position) position: vec4<f32>) -> @location(0) vec4<f32> {
// Clear with transparent pixels
return vec4<f32>(0.0, 0.0, 0.0, 0.0);
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@
//
// The alpha values are stored in a texture and sampled during fragment shading.
// This approach optimizes memory usage by only storing alpha data where needed.
//
// The `StripInstance`'s `rgba_or_slot` field can either encode a color or a slot index.
// If the alpha value is non-zero, the fragment shader samples the alpha texture.
// Otherwise, the fragment shader samples the source clip texture using the given slot index.

struct Config {
// Width of the rendering target
// Width of the rendering target
width: u32,
// Height of the rendering target
height: u32,
// Height of a strip in the rendering
// CAUTION: When changing this value, you must also update the fragment shader's
// logic to handle the new strip height.
strip_height: u32,
// Number of trailing zeros in alphas_tex_width (log2 of width).
// Pre-calculated on CPU since WebGL2 doesn't support `firstTrailingBit`.
Expand All @@ -29,17 +35,17 @@ struct StripInstance {
@location(1) widths: u32,
// Alpha texture column index where this strip's alpha values begin
@location(2) col: u32,
// [r, g, b, a] packed as u8's
@location(3) rgba: u32,
// [r, g, b, a] packed as u8's or a slot index when alpha is 0
@location(3) rgba_or_slot: u32,
}

struct VertexOutput {
// Texture coordinates for the current fragment
// Texture coordinates for the current fragment
@location(0) tex_coord: vec2<f32>,
// Ending x-position of the dense (alpha) region
@location(1) @interpolate(flat) dense_end: u32,
// RGBA color value
@location(2) @interpolate(flat) color: u32,
// Color value or slot index when alpha is 0
@location(2) @interpolate(flat) rgba_or_slot: u32,
// Normalized device coordinates (NDC) for the current vertex
@builtin(position) position: vec4<f32>,
};
Expand Down Expand Up @@ -77,21 +83,22 @@ fn vs_main(

out.position = vec4<f32>(ndc_x, ndc_y, 0.0, 1.0);
out.tex_coord = vec2<f32>(f32(instance.col) + x * f32(width), y * f32(config.strip_height));
out.color = instance.rgba;
out.rgba_or_slot = instance.rgba_or_slot;
return out;
}

@group(0) @binding(0)
var alphas_texture: texture_2d<u32>;

@group(0) @binding(2)
var clip_input_texture: texture_2d<f32>;

@fragment
fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
let x = u32(floor(in.tex_coord.x));
var alpha = 1.0;
// Determine if the current fragment is within the dense (alpha) region
// If so, sample the alpha value from the texture; otherwise, alpha remains fully opaque (1.0)
// TODO: This is a branch, but we can make it branchless by using a select
// would it be faster to do a texture lookup for every pixel?
if x < in.dense_end {
let y = u32(floor(in.tex_coord.y));
// Retrieve alpha value from the texture. We store 16 1-byte alpha
Expand All @@ -108,18 +115,28 @@ fn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {
let channel_index = alphas_index % 4u;
// Calculate texel coordinates
let tex_x = texel_index & (alphas_tex_width - 1u);
let tex_y = texel_index >> config.alphas_tex_width_bits;
let tex_y = texel_index >> config.alphas_tex_width_bits;

// Load all 4 channels from the texture
let rgba_values = textureLoad(alphas_texture, vec2<u32>(tex_x, tex_y), 0);

// Get the column's alphas from the appropriate RGBA channel based on the index
let alphas_u32 = unpack_alphas_from_channel(rgba_values, channel_index);
// Extract the alpha value for the current y-position from the packed u32 data
alpha = f32((alphas_u32 >> (y * 8u)) & 0xffu) * (1.0 / 255.0);
}
// Apply the alpha value to the unpacked RGBA color
return alpha * unpack4x8unorm(in.color);
// Apply the alpha value to the unpacked RGBA color or slot index
let alpha_byte = in.rgba_or_slot >> 24u;
if alpha_byte != 0 {
// in.rgba_or_slot encodes a color
return alpha * unpack4x8unorm(in.rgba_or_slot);
} else {
// in.rgba_or_slot encodes a slot in the source clip texture
let clip_x = u32(in.position.x) & 0xFFu;
let clip_y = (u32(in.position.y) & 3) + in.rgba_or_slot * config.strip_height;
Comment on lines +135 to +136

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not clear to me how in.position.y has been modified in this PR. From the scheduler, it looks like it also contains the slot_ix * Tile::HEIGHT, or slot y position. So why the & 3?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the answer to this question should also potentially result in a code comment in the wgsl shader.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in.position represents the position builtin. In a vertex shader, it ranges from -1 to 1 for X and Y. In the context of a fragment shader, it represents the pixel coordinate of where we are drawing (see this article). The & 3 is used to constrain the pixel coordinate to 4 since that's the height of our tile. I added a CAUTION: note to the config.strip_height about the danger in changing its value without updating this logic.

In time, we will want to make this configurable, but I'm not sure how that will present. We could make the & 3 configurable, but then we should also make the & 0xFFu configurable to wide tile width. I think at this stage we should untangle those concerns when we get to them.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh whoops, I got my x and y's confused! Thanks for the great answer :D

let clip_in_color = textureLoad(clip_input_texture, vec2(clip_x, clip_y), 0);
return alpha * clip_in_color;
}
}

fn unpack_alphas_from_channel(rgba: vec4<u32>, channel_index: u32) -> u32 {
Expand All @@ -136,6 +153,7 @@ fn unpack_alphas_from_channel(rgba: vec4<u32>, channel_index: u32) -> u32 {
// Polyfills `unpack4x8unorm`.
//
// Downlevel targets do not support native WGSL `unpack4x8unorm`.
// TODO: Remove once we upgrade to WGPU 25.
fn unpack4x8unorm(rgba_packed: u32) -> vec4<f32> {
// Extract each byte and convert to float in range [0,1]
return vec4<f32>(
Expand Down
Loading
0