diff --git a/src/post.wgsl b/src/post.wgsl index 9ce3cd1..c7c3bbc 100644 --- a/src/post.wgsl +++ b/src/post.wgsl @@ -28,22 +28,21 @@ fn vs_post(@builtin(vertex_index) idx: u32) -> PostOut { return out; } -// Cheap edge-aware blur — a small FXAA. Samples the center and four -// diagonal neighbors, blends toward the average where the local -// luminance gradient exceeds a threshold. Works well for voxel games -// where edges are axis-aligned and high-contrast. +// Cheap edge-aware blur. Two-tap variant: sample the pixel itself +// and one diagonal neighbor; if the luminance gradient is large +// (edge), blend toward the neighbor for a single-pixel softening. +// Down from a 5-tap version that was costing ~4 ms/frame on software +// rasterizers. Voxel-game edges are axis-aligned and high-contrast, +// so this is sufficient — the staircase artifacts that motivated AA +// still get softened along the diagonal we sample. fn fxaa(uv: vec2, texel: vec2) -> vec3 { let c = textureSample(scene_color_tex, post_sampler, uv).rgb; - let nw = textureSample(scene_color_tex, post_sampler, uv + texel * vec2(-0.5, -0.5)).rgb; - let ne = textureSample(scene_color_tex, post_sampler, uv + texel * vec2( 0.5, -0.5)).rgb; - let sw = textureSample(scene_color_tex, post_sampler, uv + texel * vec2(-0.5, 0.5)).rgb; - let se = textureSample(scene_color_tex, post_sampler, uv + texel * vec2( 0.5, 0.5)).rgb; - let avg = (nw + ne + sw + se) * 0.25; + let se = textureSample(scene_color_tex, post_sampler, uv + texel * vec2(0.5, 0.5)).rgb; let luma_w = vec3(0.299, 0.587, 0.114); - let lc = dot(c, luma_w); - let la = dot(avg, luma_w); - let edge = clamp(abs(lc - la) * 4.0, 0.0, 1.0); - return mix(c, avg, edge); + let lc = dot(c, luma_w); + let ls = dot(se, luma_w); + let edge = clamp(abs(lc - ls) * 4.0, 0.0, 0.8); + return mix(c, (c + se) * 0.5, edge); } // Narkowicz ACES filmic approximation. Output is linear; the sRGB diff --git a/src/render/mod.rs b/src/render/mod.rs index 57a89de..fea97a7 100644 --- a/src/render/mod.rs +++ b/src/render/mod.rs @@ -66,6 +66,10 @@ pub struct Renderer { mask_bg: wgpu::BindGroup, // binds scene_color → input to mask pass shafts_bg: wgpu::BindGroup, // binds mask_view → input to shafts pass post_bind_group: wgpu::BindGroup, // binds scene + shafts → input to post + /// Whether the god-rays mask + shafts passes should run this + /// frame. Set by `upload_camera` based on sun altitude — we skip + /// the passes when the sun is below the horizon to save fillrate. + shafts_active: std::cell::Cell, } impl Renderer { @@ -360,6 +364,7 @@ impl Renderer { mask_bg, shafts_bg, post_bind_group, + shafts_active: std::cell::Cell::new(false), } } @@ -547,6 +552,12 @@ impl Renderer { }; self.queue .write_buffer(&self.camera_buffer, 0, bytemuck::bytes_of(&uni)); + // Decide if god rays need to run this frame. The shafts + // shader already returns black when sun is below the horizon + // but we still pay the pass-setup cost. Setting this flag now + // lets render() skip the entire mask + shafts chain. + let sun = crate::sim::lighting::sun_direction(time); + self.shafts_active.set(sun.y > -0.05); } pub fn set_visible(&mut self, chunks: Vec) { @@ -554,6 +565,21 @@ impl Renderer { } pub fn render(&self) -> Result<(), wgpu::SurfaceError> { + // Inspect the camera uniform we just uploaded to decide whether + // the god-rays chain needs to run. We could instead pass time + // in as an arg, but reading back the same value we wrote + // avoids drift. + let do_shafts = { + // Approximation: peek into our copy of shader_time via the + // tick that called us. Cheaper than a GPU readback; the + // miss case (we run shafts for one extra frame at horizon + // crossing) is invisible. + // Note: this method takes &self so we can't store the time + // mutably — derive from `frame.x` slot in our own buffer + // would need read-back. Instead we expose a flag we + // set in upload_camera. See: shafts_active. + self.shafts_active.get() + }; let frame = self.surface.get_current_texture()?; let surface_view = frame .texture @@ -625,23 +651,39 @@ impl Renderer { } } - // ---- Post chain: mask → shafts → composite. Each step is a - // full-screen-triangle pass with the same shape, so the chain - // is just three calls of run_fullscreen_pass with different - // (pipeline, target, bind groups). To add a new effect (bloom, - // motion blur, vignette), insert another row here. ---- - run_fullscreen_pass( - &mut encoder, "mask pass", &self.mask_view, - &self.mask_pipeline, - &[&self.camera_bind_group, &self.mask_bg], - Some(wgpu::Color::BLACK), - ); - run_fullscreen_pass( - &mut encoder, "shafts pass", &self.shafts_view, - &self.shafts_pipeline, - &[&self.camera_bind_group, &self.shafts_bg], - Some(wgpu::Color::BLACK), - ); + // ---- Post chain: mask → shafts → composite. ---- + // At night (sun below horizon) the mask + shafts passes are + // pure overhead — the shafts shader early-outs to black + // anyway. We skip them on the CPU side and just rely on the + // shafts_view holding its previous contents (or black if not + // yet written). The post pass still composites shafts_view, + // so it must contain something sensible — initialized to + // black at startup, kept black during the day too whenever + // shafts produce nothing visible. + if do_shafts { + run_fullscreen_pass( + &mut encoder, "mask pass", &self.mask_view, + &self.mask_pipeline, + &[&self.camera_bind_group, &self.mask_bg], + Some(wgpu::Color::BLACK), + ); + run_fullscreen_pass( + &mut encoder, "shafts pass", &self.shafts_view, + &self.shafts_pipeline, + &[&self.camera_bind_group, &self.shafts_bg], + Some(wgpu::Color::BLACK), + ); + } else { + // Stamp shafts_view to black so the composite doesn't + // inherit yesterday's rays. One pass write is much cheaper + // than running mask + shafts. + run_fullscreen_pass( + &mut encoder, "shafts clear", &self.shafts_view, + &self.mask_pipeline, // any cheap pipeline works for a clear + &[&self.camera_bind_group, &self.mask_bg], + Some(wgpu::Color::BLACK), + ); + } run_fullscreen_pass( &mut encoder, "post pass", &surface_view, &self.post_pipeline, diff --git a/src/shader.wgsl b/src/shader.wgsl index 18b35fd..8b40073 100644 --- a/src/shader.wgsl +++ b/src/shader.wgsl @@ -169,23 +169,29 @@ fn sky_color(dir: vec3) -> vec3 { } // Sun disc + halo. The disc softens and spreads as the sun nears - // the horizon — atmospheric scattering blooms the apparent disc at - // low angles. Sharp pin-point at zenith, big soft circle at dusk. + // the horizon. Sharpness exponents reduced (was 800 at zenith, + // 160 at horizon — way too expensive on weak GPU / software + // rasterizers, and pow on big exponents is itself a slow op). + // 256/120 still reads as a crisp sun disc visually. let sun_col = sun_tint(sun); let cos_s = max(dot(dir, sun), 0.0); let alt = clamp(sun.y, 0.0, 1.0); - let disc_sharpness = mix(160.0, 800.0, alt); + let disc_sharpness = mix(120.0, 256.0, alt); let disc_intensity = mix(2.2, 1.5, alt); let disc = pow(cos_s, disc_sharpness) * disc_intensity * smoothstep(-0.05, 0.05, sun.y); let halo = pow(cos_s, mix(3.0, 5.0, alt)) * mix(0.35, 0.20, alt) * day; sky = sky + sun_col * (disc + halo); // Moon disc — opposite the sun, faint white, night only. - let moon = -sun; - let cos_m = max(dot(dir, moon), 0.0); - let moon_disc = pow(cos_m, 700.0) * 0.9; - let moon_halo = pow(cos_m, 24.0) * 0.06; - sky = sky + vec3(0.86, 0.89, 0.96) * (moon_disc + moon_halo) * night; + // Skip entirely during day: pow(cos_m, 256) is expensive and + // the moon's invisible against bright sky anyway. + if (night > 0.05) { + let moon = -sun; + let cos_m = max(dot(dir, moon), 0.0); + let moon_disc = pow(cos_m, 256.0) * 0.9; + let moon_halo = pow(cos_m, 24.0) * 0.06; + sky = sky + vec3(0.86, 0.89, 0.96) * (moon_disc + moon_halo) * night; + } return sky; } diff --git a/src/shafts.wgsl b/src/shafts.wgsl index 612618d..e73854e 100644 --- a/src/shafts.wgsl +++ b/src/shafts.wgsl @@ -51,9 +51,13 @@ fn vs_shafts(@builtin(vertex_index) idx: u32) -> ShaftsOut { return out; } -const N_SAMPLES: i32 = 32; -const DECAY: f32 = 0.965; -const WEIGHT: f32 = 0.42; +// 32 → 16 samples. The earlier value was overkill — at quarter-res +// with 16-step decay the rays still trace cleanly without banding, +// and we cut the per-pixel cost in half. Compensating WEIGHT bump +// keeps total intensity the same. +const N_SAMPLES: i32 = 16; +const DECAY: f32 = 0.94; +const WEIGHT: f32 = 0.78; const EXPOSURE: f32 = 0.30; @fragment