diff --git a/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx b/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx index 9ad68e8d18..c4113f83e1 100644 --- a/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx +++ b/apps/typegpu-docs/src/content/docs/fundamentals/utils.mdx @@ -267,3 +267,59 @@ Otherwise, for example when using `tgpu.resolve` on a WGSL template, logs are ig - `console.log` only works in fragment and compute shaders. This is due to a [WebGPU limitation](https://www.w3.org/TR/WGSL/#address-space) that does not allow modifying buffers during the vertex shader stage. - `console.log` currently does not support template literals (but you can use [string substitutions](https://developer.mozilla.org/en-US/docs/Web/API/console#using_string_substitutions), or just pass multiple arguments instead). + +## *for...of...* loops + +TypeGPU supports `for...of...` loops in shader functions. The only constraints are that the loop variable must be declared with `const` and the iterable must be stored in a variable. + +```ts twoslash +import tgpu, { d } from 'typegpu'; + +const processNeighbor = (cell: d.v2i) => {}; + +// ---cut--- +const processNeighbors = (cell: d.v2i) => { + 'use gpu'; + + const offsets = [ + d.vec2i(0, 1), + d.vec2i(0, -1), + d.vec2i(1, 0), + d.vec2i(-1, 0), + ]; + + for (const offset of offsets) { + processNeighbor(cell.add(offset)); + } +}; +``` + +## *tgpu.unroll* + +For code with small, fixed iteration counts, you can use `tgpu.unroll` to unroll loops at compile time. This eliminates branch prediction overhead and can significantly improve performance. + +### Usage + +Wrap your iterable with `tgpu.unroll()`: + +```ts twoslash +import tgpu, { d } from 'typegpu'; + +const processNeighbor = (cell: d.v2i) => {}; + +// ---cut--- +const processNeighbors = (cell: d.v2i) => { + 'use gpu'; + + for (const dy of tgpu.unroll([-1, 0, 1])) { + for (const dx of tgpu.unroll([-1, 0, 1])) { + processNeighbor(cell.add(d.vec2i(dx, dy))); + } + } +}; +``` + +:::note +- There are no constraints on how large a loop can be for unrolling. We will always try to unroll it, and if we can't, you'll receive an error. +- You cannot use `continue` or `break` inside loop that you intend to unroll later. +::: diff --git a/apps/typegpu-docs/src/examples/algorithms/jump-flood-distance/index.ts b/apps/typegpu-docs/src/examples/algorithms/jump-flood-distance/index.ts index 6b87f9a9f0..018084433c 100644 --- a/apps/typegpu-docs/src/examples/algorithms/jump-flood-distance/index.ts +++ b/apps/typegpu-docs/src/examples/algorithms/jump-flood-distance/index.ts @@ -186,8 +186,8 @@ const jumpFlood = root.createGuardedComputePipeline((x, y) => { let bestInsideDist = 1e20; let bestOutsideDist = 1e20; - for (let dy = -1; dy <= 1; dy++) { - for (let dx = -1; dx <= 1; dx++) { + for (const dx of tgpu.unroll([-1, 0, 1])) { + for (const dy of tgpu.unroll([-1, 0, 1])) { const sample = sampleWithOffset( pingPongLayout.$.readView, d.vec2i(x, y), diff --git a/apps/typegpu-docs/src/examples/algorithms/jump-flood-voronoi/index.ts b/apps/typegpu-docs/src/examples/algorithms/jump-flood-voronoi/index.ts index 0e24d2bb6d..bca20de251 100644 --- a/apps/typegpu-docs/src/examples/algorithms/jump-flood-voronoi/index.ts +++ b/apps/typegpu-docs/src/examples/algorithms/jump-flood-voronoi/index.ts @@ -165,22 +165,23 @@ const jumpFlood = root.createGuardedComputePipeline((x, y) => { let minDist = 1e20; let bestSample = SampleResult({ color: d.vec4f(), coord: d.vec2f(-1) }); - for (let dy = -1; dy <= 1; dy++) { - for (let dx = -1; dx <= 1; dx++) { + for (const dy of tgpu.unroll([-1, 0, 1])) { + for (const dx of tgpu.unroll([-1, 0, 1])) { const sample = sampleWithOffset( pingPongLayout.$.readView, d.vec2i(x, y), d.vec2i(dx * offset, dy * offset), ); - if (sample.coord.x < 0) { - continue; - } - - const dist = std.distance(d.vec2f(x, y), sample.coord.mul(d.vec2f(size))); - if (dist < minDist) { - minDist = dist; - bestSample = SampleResult(sample); + if (sample.coord.x >= 0) { + const dist = std.distance( + d.vec2f(x, y), + sample.coord.mul(d.vec2f(size)), + ); + if (dist < minDist) { + minDist = dist; + bestSample = SampleResult(sample); + } } } } diff --git a/apps/typegpu-docs/src/examples/image-processing/background-segmentation/shaders.ts b/apps/typegpu-docs/src/examples/image-processing/background-segmentation/shaders.ts index 3569da6019..19c2244828 100644 --- a/apps/typegpu-docs/src/examples/image-processing/background-segmentation/shaders.ts +++ b/apps/typegpu-docs/src/examples/image-processing/background-segmentation/shaders.ts @@ -56,8 +56,8 @@ export const computeFn = tgpu.computeFn({ ).sub(d.vec2i(filterOffset, 0)); // Load a tile of pixels into shared memory - for (let r = 0; r < 4; r++) { - for (let c = 0; c < 4; c++) { + for (const r of tgpu.unroll([0, 1, 2, 3])) { + for (const c of tgpu.unroll([0, 1, 2, 3])) { let loadIndex = baseIndex.add(d.vec2i(c, r)); if (flipAccess.$) { loadIndex = loadIndex.yx; @@ -75,8 +75,8 @@ export const computeFn = tgpu.computeFn({ std.workgroupBarrier(); // Apply the horizontal blur filter and write to the output texture - for (let r = 0; r < 4; r++) { - for (let c = 0; c < 4; c++) { + for (const r of tgpu.unroll([0, 1, 2, 3])) { + for (const c of tgpu.unroll([0, 1, 2, 3])) { let writeIndex = baseIndex.add(d.vec2i(c, r)); if (flipAccess.$) { writeIndex = writeIndex.yx; diff --git a/apps/typegpu-docs/src/examples/image-processing/blur/index.ts b/apps/typegpu-docs/src/examples/image-processing/blur/index.ts index 0ef39c4c05..7f96cf2610 100644 --- a/apps/typegpu-docs/src/examples/image-processing/blur/index.ts +++ b/apps/typegpu-docs/src/examples/image-processing/blur/index.ts @@ -73,8 +73,8 @@ const computeFn = tgpu.computeFn({ ).sub(d.vec2i(filterOffset, 0)); // Load a tile of pixels into shared memory - for (let r = 0; r < 4; r++) { - for (let c = 0; c < 4; c++) { + for (const r of tgpu.unroll([0, 1, 2, 3])) { + for (const c of tgpu.unroll([0, 1, 2, 3])) { let loadIndex = baseIndex.add(d.vec2i(c, r)); if (ioLayout.$.flip !== 0) { loadIndex = loadIndex.yx; @@ -92,8 +92,8 @@ const computeFn = tgpu.computeFn({ std.workgroupBarrier(); // Apply the horizontal blur filter and write to the output texture - for (let r = 0; r < 4; r++) { - for (let c = 0; c < 4; c++) { + for (const r of tgpu.unroll([0, 1, 2, 3])) { + for (const c of tgpu.unroll([0, 1, 2, 3])) { let writeIndex = baseIndex.add(d.vec2i(c, r)); if (ioLayout.$.flip !== 0) { writeIndex = writeIndex.yx; diff --git a/apps/typegpu-docs/src/examples/rendering/3d-fish/compute.ts b/apps/typegpu-docs/src/examples/rendering/3d-fish/compute.ts index 2c917155a0..1344aa281b 100644 --- a/apps/typegpu-docs/src/examples/rendering/3d-fish/compute.ts +++ b/apps/typegpu-docs/src/examples/rendering/3d-fish/compute.ts @@ -1,4 +1,4 @@ -import { d, std } from 'typegpu'; +import tgpu, { d, std } from 'typegpu'; import * as p from './params.ts'; import { computeBindGroupLayout as layout } from './schemas.ts'; import { projectPointOnLine } from './tgsl-helpers.ts'; @@ -39,7 +39,7 @@ export const simulate = (fishIndex: number) => { if (cohesionCount > 0) { cohesion = cohesion / cohesionCount - fishData.position; } - for (let i = 0; i < 3; i += 1) { + for (const i of tgpu.unroll([0, 1, 2])) { const repulsion = d.vec3f(); repulsion[i] = 1; diff --git a/apps/typegpu-docs/src/examples/rendering/clouds/utils.ts b/apps/typegpu-docs/src/examples/rendering/clouds/utils.ts index 71a1bec9b5..d5d09e63e3 100644 --- a/apps/typegpu-docs/src/examples/rendering/clouds/utils.ts +++ b/apps/typegpu-docs/src/examples/rendering/clouds/utils.ts @@ -72,12 +72,13 @@ export const raymarch = tgpu.fn([d.vec3f, d.vec3f, d.vec3f], d.vec4f)( }, ); +const iterations = Array.from({ length: FBM_OCTAVES }, (_, i) => i); const fbm = tgpu.fn([d.vec3f], d.f32)((pos) => { let sum = d.f32(); let amp = d.f32(CLOUD_AMPLITUDE); let freq = d.f32(CLOUD_FREQUENCY); - for (let i = 0; i < FBM_OCTAVES; i++) { + for (const _i of tgpu.unroll(iterations)) { sum += noise3d(std.mul(pos, freq)) * amp; amp *= FBM_PERSISTENCE; freq *= FBM_LACUNARITY; diff --git a/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/icosphere.ts b/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/icosphere.ts index f81d3fbf6e..07c4a97e89 100644 --- a/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/icosphere.ts +++ b/apps/typegpu-docs/src/examples/rendering/cubemap-reflection/icosphere.ts @@ -171,7 +171,7 @@ export class IcosphereGenerator { ]); const baseIndexNext = triangleIndex * 12; - for (let i = d.u32(0); i < 12; i++) { + for (const i of tgpu.unroll([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])) { const reprojectedVertex = newVertices[i]; const triBase = i - (i % 3); diff --git a/apps/typegpu-docs/src/examples/rendering/jelly-slider/taa.ts b/apps/typegpu-docs/src/examples/rendering/jelly-slider/taa.ts index 6956e2f700..2ce69e7096 100644 --- a/apps/typegpu-docs/src/examples/rendering/jelly-slider/taa.ts +++ b/apps/typegpu-docs/src/examples/rendering/jelly-slider/taa.ts @@ -25,8 +25,8 @@ export const taaResolveFn = tgpu.computeFn({ const dimensions = std.textureDimensions(taaResolveLayout.$.currentTexture); - for (let x = -1; x <= 1; x++) { - for (let y = -1; y <= 1; y++) { + for (const x of tgpu.unroll([-1, 0, 1])) { + for (const y of tgpu.unroll([-1, 0, 1])) { const sampleCoord = d.vec2i(gid.xy).add(d.vec2i(x, y)); const clampedCoord = std.clamp( sampleCoord, diff --git a/apps/typegpu-docs/src/examples/rendering/jelly-switch/taa.ts b/apps/typegpu-docs/src/examples/rendering/jelly-switch/taa.ts index 3748b5cd1e..2474ec5265 100644 --- a/apps/typegpu-docs/src/examples/rendering/jelly-switch/taa.ts +++ b/apps/typegpu-docs/src/examples/rendering/jelly-switch/taa.ts @@ -25,8 +25,8 @@ export const taaResolveFn = tgpu.computeFn({ const dimensions = std.textureDimensions(taaResolveLayout.$.currentTexture); - for (let x = -1; x <= 1; x++) { - for (let y = -1; y <= 1; y++) { + for (const x of tgpu.unroll([-1, 0, 1])) { + for (const y of tgpu.unroll([-1, 0, 1])) { const sampleCoord = d.vec2i(gid.xy).add(d.vec2i(x, y)); const clampedCoord = std.clamp( sampleCoord, diff --git a/apps/typegpu-docs/src/examples/simple/liquid-glass/index.ts b/apps/typegpu-docs/src/examples/simple/liquid-glass/index.ts index f15d600ce1..29900fc43e 100644 --- a/apps/typegpu-docs/src/examples/simple/liquid-glass/index.ts +++ b/apps/typegpu-docs/src/examples/simple/liquid-glass/index.ts @@ -95,7 +95,7 @@ const sampleWithChromaticAberration = ( ) => { 'use gpu'; const samples = d.arrayOf(d.vec3f, 3)(); - for (let i = 0; i < 3; i++) { + for (const i of tgpu.unroll([0, 1, 2])) { const channelOffset = dir.mul((d.f32(i) - 1.0) * offset); samples[i] = std.textureSampleBias(tex, sampler, uv.sub(channelOffset), blur).rgb; diff --git a/apps/typegpu-docs/src/examples/simple/ripple-cube/pbr.ts b/apps/typegpu-docs/src/examples/simple/ripple-cube/pbr.ts index 31867f24b4..b4408fc9af 100644 --- a/apps/typegpu-docs/src/examples/simple/ripple-cube/pbr.ts +++ b/apps/typegpu-docs/src/examples/simple/ripple-cube/pbr.ts @@ -86,13 +86,14 @@ export const evaluateLight = ( .mul(ndotl); }; +const lightCountIterations = Array.from({ length: LIGHT_COUNT }, (_, i) => i); export const shade = (p: d.v3f, n: d.v3f, v: d.v3f): d.v3f => { 'use gpu'; const material = materialAccess.$; const f0 = std.mix(d.vec3f(0.04), material.albedo, material.metallic); let lo = d.vec3f(0); - for (let i = 0; i < LIGHT_COUNT; i++) { + for (const i of tgpu.unroll(lightCountIterations)) { lo = lo.add(evaluateLight(p, n, v, lightsAccess.$[i], material, f0)); } diff --git a/apps/typegpu-docs/src/examples/simple/ripple-cube/post-processing.ts b/apps/typegpu-docs/src/examples/simple/ripple-cube/post-processing.ts index 6b5432372f..a3f8f84fab 100644 --- a/apps/typegpu-docs/src/examples/simple/ripple-cube/post-processing.ts +++ b/apps/typegpu-docs/src/examples/simple/ripple-cube/post-processing.ts @@ -81,8 +81,8 @@ export function createPostProcessingPipelines( let minColor = d.vec3f(9999); let maxColor = d.vec3f(-9999); - for (let ox = -1; ox <= 1; ox++) { - for (let oy = -1; oy <= 1; oy++) { + for (const ox of tgpu.unroll([-1, 0, 1])) { + for (const oy of tgpu.unroll([-1, 0, 1])) { const sampleCoord = coord.add(d.vec2i(ox, oy)); const clampedCoord = std.clamp( sampleCoord, diff --git a/apps/typegpu-docs/src/examples/simulation/fluid-double-buffering/index.ts b/apps/typegpu-docs/src/examples/simulation/fluid-double-buffering/index.ts index bc0ea8a51f..8cbac15c5a 100644 --- a/apps/typegpu-docs/src/examples/simulation/fluid-double-buffering/index.ts +++ b/apps/typegpu-docs/src/examples/simulation/fluid-double-buffering/index.ts @@ -160,21 +160,19 @@ const computeVelocity = (x: number, y: number): d.v2f => { ]; let dirChoiceCount = 1; - for (const offset of neighborOffsets) { + for (const offset of tgpu.unroll(neighborOffsets)) { const neighborDensity = getCell(x + offset.x, y + offset.y); const cost = neighborDensity.z + d.f32(offset.y) * gravityCost; - if (!isValidFlowOut(x + offset.x, y + offset.y)) { - continue; - } - - if (cost === leastCost) { - dirChoices[dirChoiceCount] = d.vec2f(d.f32(offset.x), d.f32(offset.y)); - dirChoiceCount++; - } else if (cost < leastCost) { - leastCost = cost; - dirChoices[0] = d.vec2f(d.f32(offset.x), d.f32(offset.y)); - dirChoiceCount = 1; + if (isValidFlowOut(x + offset.x, y + offset.y)) { + if (cost === leastCost) { + dirChoices[dirChoiceCount] = d.vec2f(d.f32(offset.x), d.f32(offset.y)); + dirChoiceCount++; + } else if (cost < leastCost) { + leastCost = cost; + dirChoices[0] = d.vec2f(d.f32(offset.x), d.f32(offset.y)); + dirChoiceCount = 1; + } } } diff --git a/apps/typegpu-docs/src/examples/simulation/slime-mold-3d/index.ts b/apps/typegpu-docs/src/examples/simulation/slime-mold-3d/index.ts index 1c08a09028..201c05c662 100644 --- a/apps/typegpu-docs/src/examples/simulation/slime-mold-3d/index.ts +++ b/apps/typegpu-docs/src/examples/simulation/slime-mold-3d/index.ts @@ -176,6 +176,8 @@ const getPerpendicular = (dir: d.v3f) => { return std.normalize(std.cross(dir, axis)); }; +const numSamples = 8; +const samplesIterations = Array.from({ length: numSamples }, (_, i) => i); const sense3D = (pos: d.v3f, direction: d.v3f) => { 'use gpu'; const dims = std.textureDimensions(computeLayout.$.oldState); @@ -187,8 +189,7 @@ const sense3D = (pos: d.v3f, direction: d.v3f) => { const perp1 = getPerpendicular(direction); const perp2 = std.cross(direction, perp1); - const numSamples = 8; - for (let i = 0; i < numSamples; i++) { + for (const i of tgpu.unroll(samplesIterations)) { const theta = (i / numSamples) * 2 * Math.PI; const coneOffset = perp1.mul(std.cos(theta)).add(perp2.mul(std.sin(theta))); diff --git a/apps/typegpu-docs/src/examples/simulation/slime-mold/index.ts b/apps/typegpu-docs/src/examples/simulation/slime-mold/index.ts index fda0e7c29e..019effa71e 100644 --- a/apps/typegpu-docs/src/examples/simulation/slime-mold/index.ts +++ b/apps/typegpu-docs/src/examples/simulation/slime-mold/index.ts @@ -168,8 +168,8 @@ const blur = tgpu.computeFn({ let count = d.f32(); // 3x3 blur kernel - for (let offsetY = -1; offsetY <= 1; offsetY++) { - for (let offsetX = -1; offsetX <= 1; offsetX++) { + for (const offsetY of tgpu.unroll([-1, 0, 1])) { + for (const offsetX of tgpu.unroll([-1, 0, 1])) { const samplePos = d.vec2i(gid.xy).add(d.vec2i(offsetX, offsetY)); const dimsi = d.vec2i(dims); diff --git a/apps/typegpu-docs/src/examples/simulation/stable-fluid/simulation.ts b/apps/typegpu-docs/src/examples/simulation/stable-fluid/simulation.ts index 21bb70bdbf..d424c1adb1 100644 --- a/apps/typegpu-docs/src/examples/simulation/stable-fluid/simulation.ts +++ b/apps/typegpu-docs/src/examples/simulation/stable-fluid/simulation.ts @@ -9,7 +9,7 @@ const getNeighbors = tgpu.fn([d.vec2i, d.vec2i], d.arrayOf(d.vec2i, 4))( d.vec2i(1, 0), d.vec2i(0, 1), ]; - for (let i = 0; i < 4; i++) { + for (const i of tgpu.unroll([0, 1, 2, 3])) { adjacentOffsets[i] = std.clamp( std.add(coords, adjacentOffsets[i]), d.vec2i(), diff --git a/packages/typegpu/src/core/unroll/tgpuUnroll.ts b/packages/typegpu/src/core/unroll/tgpuUnroll.ts new file mode 100644 index 0000000000..cfb31fd563 --- /dev/null +++ b/packages/typegpu/src/core/unroll/tgpuUnroll.ts @@ -0,0 +1,54 @@ +import { stitch } from '../resolve/stitch.ts'; +import { + $gpuCallable, + $internal, + $resolve, +} from '../../../src/shared/symbols.ts'; +import { setName } from '../../../src/shared/meta.ts'; +import type { DualFn } from '../../../src/types.ts'; +import type { AnyData } from '../../../src/data/dataTypes.ts'; +import { + type ResolvedSnippet, + snip, + type Snippet, +} from '../../../src/data/snippet.ts'; +import type { ResolutionCtx, SelfResolvable } from '../../../src/types.ts'; + +/** + * The result of calling `tgpu.unroll(...)`. The code responsible for + * generating shader code can check if the value of a snippet is + * an instance of `UnrollableIterable`, and act accordingly. + */ +export class UnrollableIterable implements SelfResolvable { + readonly [$internal] = true; + + constructor(public readonly snippet: Snippet) {} + + [$resolve](_ctx: ResolutionCtx): ResolvedSnippet { + return snip( + stitch`${this.snippet}`, + this.snippet.dataType as AnyData, + this.snippet.origin, + ); + } +} + +/** + * Marks an iterable to be unrolled by the wgslGenerator. + */ +export const unroll = (() => { + const impl = (>(value: T) => value) as unknown as + & DualFn<(>(value: T) => T)> + & { [$internal]: true }; + + setName(impl, 'unroll'); + impl.toString = () => 'unroll'; + impl[$internal] = true; + impl[$gpuCallable] = { + call(_ctx, [value]) { + return snip(new UnrollableIterable(value), value.dataType, value.origin); + }, + }; + + return impl; +})(); diff --git a/packages/typegpu/src/tgpu.ts b/packages/typegpu/src/tgpu.ts index 77267ee113..346be531ae 100644 --- a/packages/typegpu/src/tgpu.ts +++ b/packages/typegpu/src/tgpu.ts @@ -14,5 +14,6 @@ export { bindGroupLayout } from './tgpuBindGroupLayout.ts'; export { computeFn } from './core/function/tgpuComputeFn.ts'; export { fragmentFn } from './core/function/tgpuFragmentFn.ts'; export { vertexFn } from './core/function/tgpuVertexFn.ts'; +export { unroll } from './core/unroll/tgpuUnroll.ts'; export * as '~unstable' from './tgpuUnstable.ts'; diff --git a/packages/typegpu/src/tgsl/accessIndex.ts b/packages/typegpu/src/tgsl/accessIndex.ts index 16016a3e0c..30b6e0ab10 100644 --- a/packages/typegpu/src/tgsl/accessIndex.ts +++ b/packages/typegpu/src/tgsl/accessIndex.ts @@ -18,8 +18,10 @@ import { isPtr, isVec, isWgslArray, + isWgslStruct, } from '../data/wgslTypes.ts'; import { isKnownAtComptime } from '../types.ts'; +import { accessProp } from './accessProp.ts'; import { coerceToSnippet } from './generationHelpers.ts'; const indexableTypeToResult = { @@ -129,5 +131,12 @@ export function accessIndex( ); } + if ( + isWgslStruct(target.dataType) && isKnownAtComptime(index) && + typeof index.value === 'string' + ) { + return accessProp(target, index.value); + } + return undefined; } diff --git a/packages/typegpu/src/tgsl/forOfUtils.ts b/packages/typegpu/src/tgsl/forOfUtils.ts new file mode 100644 index 0000000000..36578c19ca --- /dev/null +++ b/packages/typegpu/src/tgsl/forOfUtils.ts @@ -0,0 +1,113 @@ +import { UnknownData } from '../data/dataTypes.ts'; +import { isEphemeralSnippet, snip, type Snippet } from '../data/snippet.ts'; +import { stitch } from '../core/resolve/stitch.ts'; +import * as wgsl from '../data/wgslTypes.ts'; +import { u32 } from '../data/numeric.ts'; +import { invariant, WgslTypeError } from '../errors.ts'; +import { arrayLength } from '../std/array.ts'; +import { accessIndex } from './accessIndex.ts'; +import { createPtrFromOrigin, implicitFrom } from '../data/ptr.ts'; +import { $gpuCallable } from '../shared/symbols.ts'; +import { + ArrayExpression, + concretize, + type GenerationCtx, +} from './generationHelpers.ts'; + +export function getLoopVarKind(elementSnippet: Snippet) { + // If it's ephemeral, it's a value that cannot change. If it's a reference, we take + // an implicit pointer to it + return elementSnippet.origin === 'constant-tgpu-const-ref' ? 'const' : 'let'; +} + +export function getElementSnippet( + iterableSnippet: Snippet, + index: Snippet, +) { + const elementSnippet = accessIndex( + iterableSnippet, + index, + ); + + if (!elementSnippet) { + throw new WgslTypeError( + '`for ... of ...` loops only support array or vector iterables', + ); + } + + return elementSnippet; +} + +export function getElementType( + elementSnippet: Snippet, + iterableSnippet: Snippet, +) { + let elementType = elementSnippet.dataType; + if (elementType === UnknownData) { + throw new WgslTypeError( + stitch`The elements in iterable ${iterableSnippet} are of unknown type`, + ); + } + + if ( + isEphemeralSnippet(elementSnippet) || + elementSnippet.origin === 'constant-tgpu-const-ref' || + elementSnippet.origin === 'runtime-tgpu-const-ref' + ) { + return elementType; + } + + if (!wgsl.isPtr(elementType)) { + const ptrType = createPtrFromOrigin( + elementSnippet.origin, + concretize(elementType as wgsl.AnyWgslData) as wgsl.StorableData, + ); + invariant( + ptrType !== undefined, + `Creating pointer type from origin ${elementSnippet.origin}`, + ); + elementType = ptrType; + } + + return implicitFrom(elementType as wgsl.Ptr); +} + +export function getElementCountSnippet( + ctx: GenerationCtx, + iterableSnippet: Snippet, + unroll: boolean = false, +) { + const { value, dataType } = iterableSnippet; + + if (wgsl.isWgslArray(dataType)) { + return dataType.elementCount > 0 + ? snip( + dataType.elementCount, + u32, + 'constant', + ) + : arrayLength[$gpuCallable].call(ctx, [iterableSnippet]); + } + + if (wgsl.isVec(dataType)) { + return snip( + dataType.componentCount, + u32, + 'constant', + ); + } + + if (unroll) { + if (Array.isArray(value)) { + return snip(value.length, u32, 'constant'); + } + + if (value instanceof ArrayExpression) { + return snip(value.elements.length, u32, 'constant'); + } + } + + throw new WgslTypeError( + '`for ... of ...` loops only support array or vector iterables', + ); +} diff --git a/packages/typegpu/src/tgsl/wgslGenerator.ts b/packages/typegpu/src/tgsl/wgslGenerator.ts index b54ea46ec2..b7fd93cbfb 100644 --- a/packages/typegpu/src/tgsl/wgslGenerator.ts +++ b/packages/typegpu/src/tgsl/wgslGenerator.ts @@ -47,12 +47,13 @@ import type { ShaderGenerator } from './shaderGenerator.ts'; import { createPtrFromOrigin, implicitFrom, ptrFn } from '../data/ptr.ts'; import { RefOperator } from '../data/ref.ts'; import { constant } from '../core/constant/tgpuConstant.ts'; +import { UnrollableIterable } from '../core/unroll/tgpuUnroll.ts'; import { isGenericFn } from '../core/function/tgpuFn.ts'; import type { AnyFn } from '../core/function/fnTypes.ts'; -import { arrayLength } from '../std/array.ts'; import { AutoStruct } from '../data/autoStruct.ts'; import { mathToStd } from './math.ts'; import type { ExternalMap } from '../core/resolve/externals.ts'; +import * as forOfUtils from './forOfUtils.ts'; const { NodeTypeCatalog: NODE } = tinyest; @@ -186,6 +187,8 @@ const binaryOpCodeToCodegen = { class WgslGenerator implements ShaderGenerator { #ctx: GenerationCtx | undefined = undefined; + // used to detect `continue` and `break` nodes in loop body + #unrolling = false; public initGenerator(ctx: GenerationCtx) { this.#ctx = ctx; @@ -1154,6 +1157,8 @@ ${this.ctx.pre}else ${alternate}`; if (statement[0] === NODE.for) { const [_, init, condition, update, body] = statement; + const prevUnrollingFlag = this.#unrolling; + this.#unrolling = false; try { this.ctx.pushBlockScope(); @@ -1170,114 +1175,128 @@ ${this.ctx.pre}else ${alternate}`; const bodyStr = this.block(blockifySingleStatement(body)); return stitch`${this.ctx.pre}for (${initStr}; ${conditionExpr}; ${updateStr}) ${bodyStr}`; } finally { + this.#unrolling = prevUnrollingFlag; this.ctx.popBlockScope(); } } if (statement[0] === NODE.while) { - const [_, condition, body] = statement; - const condSnippet = this.typedExpression(condition, bool); - const conditionStr = this.ctx.resolve(condSnippet.value).value; + const prevUnrollingFlag = this.#unrolling; + this.#unrolling = false; + try { + const [_, condition, body] = statement; + const condSnippet = this.typedExpression(condition, bool); + const conditionStr = this.ctx.resolve(condSnippet.value).value; - const bodyStr = this.block(blockifySingleStatement(body)); - return `${this.ctx.pre}while (${conditionStr}) ${bodyStr}`; + const bodyStr = this.block(blockifySingleStatement(body)); + return `${this.ctx.pre}while (${conditionStr}) ${bodyStr}`; + } finally { + this.#unrolling = prevUnrollingFlag; + } } if (statement[0] === NODE.forOf) { const [_, loopVar, iterable, body] = statement; - const iterableSnippet = this.expression(iterable); - if (isEphemeralSnippet(iterableSnippet)) { - throw new Error( - '`for ... of ...` loops only support iterables stored in variables', + if (loopVar[0] !== NODE.const) { + throw new WgslTypeError( + 'Only `for (const ... of ... )` loops are supported', ); } - try { - this.ctx.pushBlockScope(); - const index = this.ctx.makeNameValid('i'); + let ctxIndent = false; + const prevUnrollingFlag = this.#unrolling; - const elementSnippet = accessIndex( + try { + this.ctx.pushBlockScope(); + const iterableExpr = this.expression(iterable); + const shouldUnroll = iterableExpr.value instanceof UnrollableIterable; + const iterableSnippet = shouldUnroll + ? iterableExpr.value.snippet + : iterableExpr; + const elementCountSnippet = forOfUtils.getElementCountSnippet( + this.ctx, iterableSnippet, - snip(index, u32, 'runtime'), + shouldUnroll, ); - if (!elementSnippet) { - throw new WgslTypeError( - '`for ... of ...` loops only support array or vector iterables', - ); - } + const originalLoopVarName = loopVar[1]; + const blockified = blockifySingleStatement(body); - const iterableDataType = iterableSnippet.dataType; - let elementCountSnippet: Snippet; - let elementType = elementSnippet.dataType; + if (shouldUnroll) { + if (!isKnownAtComptime(elementCountSnippet)) { + throw new Error( + 'Cannot unroll loop. Length of iterable is unknown at comptime.', + ); + } - if (elementType === UnknownData) { - throw new WgslTypeError( - stitch`The elements in iterable ${iterableSnippet} are of unknown type`, - ); - } + this.#unrolling = true; - if (wgsl.isWgslArray(iterableDataType)) { - elementCountSnippet = iterableDataType.elementCount > 0 - ? snip( - `${iterableDataType.elementCount}`, - u32, - 'constant', - ) - : arrayLength[$gpuCallable].call(this.ctx, [iterableSnippet]); - } else if (wgsl.isVec(iterableDataType)) { - elementCountSnippet = snip( - `${Number(iterableDataType.type.match(/\d/))}`, - u32, - 'constant', - ); - } else { - throw new WgslTypeError( - '`for ... of ...` loops only support array or vector iterables', - ); + const length = elementCountSnippet.value as number; + if (length === 0) { + return ''; + } + + const { value } = iterableSnippet; + + const elements = value instanceof ArrayExpression + ? value.elements + : Array.from( + { length }, + (_, i) => + forOfUtils.getElementSnippet( + iterableSnippet, + snip(i, u32, 'constant'), + ), + ); + + if ( + isEphemeralSnippet(elements[0] as Snippet) && + !wgsl.isNaturallyEphemeral(elements[0]?.dataType) + ) { + throw new WgslTypeError( + 'Cannot unroll loop. The elements of iterable are emphemeral but not naturally ephemeral.', + ); + } + + const blocks = elements + .map((e, i) => + `${this.ctx.pre}// unrolled iteration #${i}\n${this.ctx.pre}${ + this.block(blockified, { [originalLoopVarName]: e }) + }` + ); + + return blocks.join('\n'); } - if (loopVar[0] !== NODE.const) { - throw new WgslTypeError( - 'Only `for (const ... of ... )` loops are supported', + if (isEphemeralSnippet(iterableSnippet)) { + throw new Error( + `\`for ... of ...\` loops only support iterables stored in variables. + ----- + You can wrap iterable with \`tgpu.unroll(...)\`. If iterable is known at comptime, the loop will be unrolled. + -----`, ); } - // If it's ephemeral, it's a value that cannot change. If it's a reference, we take - // an implicit pointer to it - let loopVarKind = 'let'; - if (!isEphemeralSnippet(elementSnippet)) { - if (elementSnippet.origin === 'constant-tgpu-const-ref') { - loopVarKind = 'const'; - } else if (elementSnippet.origin === 'runtime-tgpu-const-ref') { - loopVarKind = 'let'; - } else { - loopVarKind = 'let'; - if (!wgsl.isPtr(elementType)) { - const ptrType = createPtrFromOrigin( - elementSnippet.origin, - concretize( - elementType as wgsl.AnyWgslData, - ) as wgsl.StorableData, - ); - invariant( - ptrType !== undefined, - `Creating pointer type from origin ${elementSnippet.origin}`, - ); - elementType = ptrType; - } + this.#unrolling = false; - elementType = implicitFrom(elementType as wgsl.Ptr); - } - } + const index = this.ctx.makeNameValid('i'); + const elementSnippet = forOfUtils.getElementSnippet( + iterableSnippet, + snip(index, u32, 'runtime'), + ); + const loopVarName = this.ctx.makeNameValid(originalLoopVarName); + const loopVarKind = forOfUtils.getLoopVarKind(elementSnippet); + const elementType = forOfUtils.getElementType( + elementSnippet, + iterableSnippet, + ); + + const forHeaderStr = + stitch`${this.ctx.pre}for (var ${index} = 0u; ${index} < ${elementCountSnippet}; ${index}++) {`; - const forStr = - stitch`${this.ctx.pre}for (var ${index} = 0u; ${index} < ${ - tryConvertSnippet(this.ctx, elementCountSnippet, u32, false) - }; ${index}++) {`; this.ctx.indent(); + ctxIndent = true; - const loopVarName = this.ctx.makeNameValid(loopVar[1]); const loopVarDeclStr = stitch`${this.ctx.pre}${loopVarKind} ${loopVarName} = ${ tryConvertSnippet( @@ -1289,24 +1308,43 @@ ${this.ctx.pre}else ${alternate}`; };`; const bodyStr = `${this.ctx.pre}${ - this.block(blockifySingleStatement(body), { - [loopVar[1]]: snip(loopVarName, elementType, elementSnippet.origin), + this.block(blockified, { + [originalLoopVarName]: snip( + loopVarName, + elementType, + elementSnippet.origin, + ), }) }`; this.ctx.dedent(); + ctxIndent = false; - return stitch`${forStr}\n${loopVarDeclStr}\n${bodyStr}\n${this.ctx.pre}}`; + return stitch`${forHeaderStr}\n${loopVarDeclStr}\n${bodyStr}\n${this.ctx.pre}}`; } finally { + if (ctxIndent) { + this.ctx.dedent(); + } + this.#unrolling = prevUnrollingFlag; this.ctx.popBlockScope(); } } if (statement[0] === NODE.continue) { + if (this.#unrolling) { + throw new WgslTypeError( + 'Cannot unroll loop containing `continue`', + ); + } return `${this.ctx.pre}continue;`; } if (statement[0] === NODE.break) { + if (this.#unrolling) { + throw new WgslTypeError( + 'Cannot unroll loop containing `break`', + ); + } return `${this.ctx.pre}break;`; } diff --git a/packages/typegpu/tests/examples/individual/3d-fish.test.ts b/packages/typegpu/tests/examples/individual/3d-fish.test.ts index f5d60e2251..03d8de4ed8 100644 --- a/packages/typegpu/tests/examples/individual/3d-fish.test.ts +++ b/packages/typegpu/tests/examples/individual/3d-fish.test.ts @@ -160,11 +160,44 @@ describe('3d fish example', () => { if ((cohesionCount > 0i)) { cohesion = ((cohesion / f32(cohesionCount)) - (*fishData).position); } - for (var i = 0; (i < 3i); i += 1i) { + // unrolled iteration #0 + { var repulsion = vec3f(); - repulsion[i] = 1f; - let axisAquariumSize = (vec3f(10, 4, 10)[i] / 2f); - let axisPosition = (*fishData).position[i]; + repulsion[0i] = 1f; + const axisAquariumSize = 5f; + let axisPosition = (*fishData).position[0i]; + const distance_1 = 0.1; + if ((axisPosition > (axisAquariumSize - distance_1))) { + let str2 = (axisPosition - (axisAquariumSize - distance_1)); + wallRepulsion = (wallRepulsion - (repulsion * str2)); + } + if ((axisPosition < (-(axisAquariumSize) + distance_1))) { + let str2 = ((-(axisAquariumSize) + distance_1) - axisPosition); + wallRepulsion = (wallRepulsion + (repulsion * str2)); + } + } + // unrolled iteration #1 + { + var repulsion = vec3f(); + repulsion[1i] = 1f; + const axisAquariumSize = 2f; + let axisPosition = (*fishData).position[1i]; + const distance_1 = 0.1; + if ((axisPosition > (axisAquariumSize - distance_1))) { + let str2 = (axisPosition - (axisAquariumSize - distance_1)); + wallRepulsion = (wallRepulsion - (repulsion * str2)); + } + if ((axisPosition < (-(axisAquariumSize) + distance_1))) { + let str2 = ((-(axisAquariumSize) + distance_1) - axisPosition); + wallRepulsion = (wallRepulsion + (repulsion * str2)); + } + } + // unrolled iteration #2 + { + var repulsion = vec3f(); + repulsion[2i] = 1f; + const axisAquariumSize = 5f; + let axisPosition = (*fishData).position[2i]; const distance_1 = 0.1; if ((axisPosition > (axisAquariumSize - distance_1))) { let str2 = (axisPosition - (axisAquariumSize - distance_1)); diff --git a/packages/typegpu/tests/examples/individual/blur.test.ts b/packages/typegpu/tests/examples/individual/blur.test.ts index 137c90c309..f34a83cf36 100644 --- a/packages/typegpu/tests/examples/individual/blur.test.ts +++ b/packages/typegpu/tests/examples/individual/blur.test.ts @@ -52,28 +52,410 @@ describe('blur example', () => { let filterOffset = i32((f32(((*settings2).filterDim - 1i)) / 2f)); var dims = vec2i(textureDimensions(inTexture)); var baseIndex = (vec2i(((_arg_0.wid.xy * vec2u((*settings2).blockDim, 4u)) + (_arg_0.lid.xy * vec2u(4, 1)))) - vec2i(filterOffset, 0i)); - for (var r = 0; (r < 4i); r++) { - for (var c = 0; (c < 4i); c++) { - var loadIndex = (baseIndex + vec2i(c, r)); + // unrolled iteration #0 + { + // unrolled iteration #0 + { + var loadIndex = (baseIndex + vec2i()); if ((flip != 0u)) { loadIndex = loadIndex.yx; } - tileData[r][((_arg_0.lid.x * 4u) + u32(c))] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + tileData[0i][((_arg_0.lid.x * 4u) + 0u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #1 + { + var loadIndex = (baseIndex + vec2i(1, 0)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[0i][((_arg_0.lid.x * 4u) + 1u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #2 + { + var loadIndex = (baseIndex + vec2i(2, 0)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[0i][((_arg_0.lid.x * 4u) + 2u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #3 + { + var loadIndex = (baseIndex + vec2i(3, 0)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[0i][((_arg_0.lid.x * 4u) + 3u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + } + // unrolled iteration #1 + { + // unrolled iteration #0 + { + var loadIndex = (baseIndex + vec2i(0, 1)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[1i][((_arg_0.lid.x * 4u) + 0u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #1 + { + var loadIndex = (baseIndex + vec2i(1)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[1i][((_arg_0.lid.x * 4u) + 1u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #2 + { + var loadIndex = (baseIndex + vec2i(2, 1)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[1i][((_arg_0.lid.x * 4u) + 2u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #3 + { + var loadIndex = (baseIndex + vec2i(3, 1)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[1i][((_arg_0.lid.x * 4u) + 3u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + } + // unrolled iteration #2 + { + // unrolled iteration #0 + { + var loadIndex = (baseIndex + vec2i(0, 2)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[2i][((_arg_0.lid.x * 4u) + 0u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #1 + { + var loadIndex = (baseIndex + vec2i(1, 2)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[2i][((_arg_0.lid.x * 4u) + 1u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #2 + { + var loadIndex = (baseIndex + vec2i(2)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[2i][((_arg_0.lid.x * 4u) + 2u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #3 + { + var loadIndex = (baseIndex + vec2i(3, 2)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[2i][((_arg_0.lid.x * 4u) + 3u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + } + // unrolled iteration #3 + { + // unrolled iteration #0 + { + var loadIndex = (baseIndex + vec2i(0, 3)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[3i][((_arg_0.lid.x * 4u) + 0u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #1 + { + var loadIndex = (baseIndex + vec2i(1, 3)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[3i][((_arg_0.lid.x * 4u) + 1u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #2 + { + var loadIndex = (baseIndex + vec2i(2, 3)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[3i][((_arg_0.lid.x * 4u) + 2u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; + } + // unrolled iteration #3 + { + var loadIndex = (baseIndex + vec2i(3)); + if ((flip != 0u)) { + loadIndex = loadIndex.yx; + } + tileData[3i][((_arg_0.lid.x * 4u) + 3u)] = textureSampleLevel(inTexture, sampler_1, ((vec2f(loadIndex) + vec2f(0.5)) / vec2f(dims)), 0).rgb; } } workgroupBarrier(); - for (var r = 0; (r < 4i); r++) { - for (var c = 0; (c < 4i); c++) { - var writeIndex = (baseIndex + vec2i(c, r)); + // unrolled iteration #0 + { + // unrolled iteration #0 + { + var writeIndex = (baseIndex + vec2i()); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 0i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[0i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #1 + { + var writeIndex = (baseIndex + vec2i(1, 0)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 1i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[0i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #2 + { + var writeIndex = (baseIndex + vec2i(2, 0)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 2i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[0i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #3 + { + var writeIndex = (baseIndex + vec2i(3, 0)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 3i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[0i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + } + // unrolled iteration #1 + { + // unrolled iteration #0 + { + var writeIndex = (baseIndex + vec2i(0, 1)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 0i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[1i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #1 + { + var writeIndex = (baseIndex + vec2i(1)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 1i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[1i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #2 + { + var writeIndex = (baseIndex + vec2i(2, 1)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 2i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[1i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #3 + { + var writeIndex = (baseIndex + vec2i(3, 1)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 3i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[1i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + } + // unrolled iteration #2 + { + // unrolled iteration #0 + { + var writeIndex = (baseIndex + vec2i(0, 2)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 0i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[2i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #1 + { + var writeIndex = (baseIndex + vec2i(1, 2)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 1i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[2i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #2 + { + var writeIndex = (baseIndex + vec2i(2)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 2i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[2i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #3 + { + var writeIndex = (baseIndex + vec2i(3, 2)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 3i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[2i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + } + // unrolled iteration #3 + { + // unrolled iteration #0 + { + var writeIndex = (baseIndex + vec2i(0, 3)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 0i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[3i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #1 + { + var writeIndex = (baseIndex + vec2i(1, 3)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 1i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[3i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #2 + { + var writeIndex = (baseIndex + vec2i(2, 3)); + if ((flip != 0u)) { + writeIndex = writeIndex.yx; + } + let center = (i32((4u * _arg_0.lid.x)) + 2i); + if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { + var acc = vec3f(); + for (var f = 0; (f < (*settings2).filterDim); f++) { + let i = ((center + f) - filterOffset); + acc = (acc + (tileData[3i][i] * (1f / f32((*settings2).filterDim)))); + } + textureStore(outTexture, writeIndex, vec4f(acc, 1f)); + } + } + // unrolled iteration #3 + { + var writeIndex = (baseIndex + vec2i(3)); if ((flip != 0u)) { writeIndex = writeIndex.yx; } - let center = (i32((4u * _arg_0.lid.x)) + c); + let center = (i32((4u * _arg_0.lid.x)) + 3i); if ((((center >= filterOffset) && (center < (128i - filterOffset))) && all((writeIndex < dims)))) { var acc = vec3f(); for (var f = 0; (f < (*settings2).filterDim); f++) { let i = ((center + f) - filterOffset); - acc = (acc + (tileData[r][i] * (1f / f32((*settings2).filterDim)))); + acc = (acc + (tileData[3i][i] * (1f / f32((*settings2).filterDim)))); } textureStore(outTexture, writeIndex, vec4f(acc, 1f)); } diff --git a/packages/typegpu/tests/examples/individual/clouds.test.ts b/packages/typegpu/tests/examples/individual/clouds.test.ts index df70cf0435..c358a845c9 100644 --- a/packages/typegpu/tests/examples/individual/clouds.test.ts +++ b/packages/typegpu/tests/examples/individual/clouds.test.ts @@ -86,7 +86,20 @@ describe('clouds example', () => { var sum = 0f; var amp = 1f; var freq = 1.399999976158142f; - for (var i = 0; (i < 3i); i++) { + // unrolled iteration #0 + { + sum += (noise3d((pos * freq)) * amp); + amp *= 0.5f; + freq *= 2f; + } + // unrolled iteration #1 + { + sum += (noise3d((pos * freq)) * amp); + amp *= 0.5f; + freq *= 2f; + } + // unrolled iteration #2 + { sum += (noise3d((pos * freq)) * amp); amp *= 0.5f; freq *= 2f; diff --git a/packages/typegpu/tests/examples/individual/cubemap-reflection.test.ts b/packages/typegpu/tests/examples/individual/cubemap-reflection.test.ts index 83dbbb3084..641e6b7cca 100644 --- a/packages/typegpu/tests/examples/individual/cubemap-reflection.test.ts +++ b/packages/typegpu/tests/examples/individual/cubemap-reflection.test.ts @@ -82,14 +82,158 @@ describe('cubemap reflection example', () => { var v31 = vec4f(normalize(calculateMidpoint(v3, v1).xyz), 1f); var newVertices = array(v1, v12, v31, v2, v23, v12, v3, v31, v23, v12, v23, v31); let baseIndexNext = (triangleIndex * 12u); - for (var i = 0u; (i < 12u); i++) { - let reprojectedVertex = (&newVertices[i]); - let triBase = (i - (i % 3u)); + // unrolled iteration #0 + { + let reprojectedVertex = (&newVertices[0i]); + let triBase = (0 - (0 % 3)); var normal = (*reprojectedVertex); if ((smoothFlag == 0u)) { - normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1u)], newVertices[(triBase + 2u)]); + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); } - let outIndex = (baseIndexNext + i); + let outIndex = (baseIndexNext + 0u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #1 + { + let reprojectedVertex = (&newVertices[1i]); + let triBase = (1 - (1 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 1u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #2 + { + let reprojectedVertex = (&newVertices[2i]); + let triBase = (2 - (2 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 2u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #3 + { + let reprojectedVertex = (&newVertices[3i]); + let triBase = (3 - (3 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 3u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #4 + { + let reprojectedVertex = (&newVertices[4i]); + let triBase = (4 - (4 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 4u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #5 + { + let reprojectedVertex = (&newVertices[5i]); + let triBase = (5 - (5 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 5u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #6 + { + let reprojectedVertex = (&newVertices[6i]); + let triBase = (6 - (6 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 6u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #7 + { + let reprojectedVertex = (&newVertices[7i]); + let triBase = (7 - (7 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 7u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #8 + { + let reprojectedVertex = (&newVertices[8i]); + let triBase = (8 - (8 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 8u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #9 + { + let reprojectedVertex = (&newVertices[9i]); + let triBase = (9 - (9 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 9u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #10 + { + let reprojectedVertex = (&newVertices[10i]); + let triBase = (10 - (10 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 10u); + let nextVertex = (&(*nextVertices)[outIndex]); + (*nextVertex).position = packVec2u((*reprojectedVertex)); + (*nextVertex).normal = packVec2u(normal); + } + // unrolled iteration #11 + { + let reprojectedVertex = (&newVertices[11i]); + let triBase = (11 - (11 % 3)); + var normal = (*reprojectedVertex); + if ((smoothFlag == 0u)) { + normal = getAverageNormal(newVertices[triBase], newVertices[(triBase + 1i)], newVertices[(triBase + 2i)]); + } + let outIndex = (baseIndexNext + 11u); let nextVertex = (&(*nextVertices)[outIndex]); (*nextVertex).position = packVec2u((*reprojectedVertex)); (*nextVertex).normal = packVec2u(normal); diff --git a/packages/typegpu/tests/examples/individual/fluid-double-buffering.test.ts b/packages/typegpu/tests/examples/individual/fluid-double-buffering.test.ts index 1bc06bccb2..e4f7c5d35e 100644 --- a/packages/typegpu/tests/examples/individual/fluid-double-buffering.test.ts +++ b/packages/typegpu/tests/examples/individual/fluid-double-buffering.test.ts @@ -36,7 +36,7 @@ describe('fluid double buffering example', () => { @group(0) @binding(1) var obstacles: array; fn isInsideObstacle(x: i32, y: i32) -> bool { - for (var i = 0u; i < 4; i++) { + for (var i = 0u; i < 4u; i++) { let obs = (&obstacles[i]); { if (((*obs).enabled == 0u)) { @@ -131,7 +131,7 @@ describe('fluid double buffering example', () => { @group(0) @binding(3) var obstacles: array; fn isInsideObstacle(x: i32, y: i32) -> bool { - for (var i = 0u; i < 4; i++) { + for (var i = 0u; i < 4u; i++) { let obs = (&obstacles[i]); { if (((*obs).enabled == 0u)) { @@ -178,22 +178,73 @@ describe('fluid double buffering example', () => { var leastCost = cell.z; var dirChoices = array(vec2f(), vec2f(), vec2f(), vec2f()); var dirChoiceCount = 1; - for (var i = 0u; i < 4; i++) { - let offset = (&neighborOffsets[i]); - { - var neighborDensity = getCell((x + (*offset).x), (y + (*offset).y)); - let cost = (neighborDensity.z + (f32((*offset).y) * gravityCost)); - if (!isValidFlowOut((x + (*offset).x), (y + (*offset).y))) { - continue; + // unrolled iteration #0 + { + var neighborDensity = getCell((x + neighborOffsets[0u].x), (y + neighborOffsets[0u].y)); + let cost = (neighborDensity.z + (f32(neighborOffsets[0u].y) * gravityCost)); + if (isValidFlowOut((x + neighborOffsets[0u].x), (y + neighborOffsets[0u].y))) { + if ((cost == leastCost)) { + dirChoices[dirChoiceCount] = vec2f(f32(neighborOffsets[0u].x), f32(neighborOffsets[0u].y)); + dirChoiceCount++; + } + else { + if ((cost < leastCost)) { + leastCost = cost; + dirChoices[0i] = vec2f(f32(neighborOffsets[0u].x), f32(neighborOffsets[0u].y)); + dirChoiceCount = 1i; + } + } + } + } + // unrolled iteration #1 + { + var neighborDensity = getCell((x + neighborOffsets[1u].x), (y + neighborOffsets[1u].y)); + let cost = (neighborDensity.z + (f32(neighborOffsets[1u].y) * gravityCost)); + if (isValidFlowOut((x + neighborOffsets[1u].x), (y + neighborOffsets[1u].y))) { + if ((cost == leastCost)) { + dirChoices[dirChoiceCount] = vec2f(f32(neighborOffsets[1u].x), f32(neighborOffsets[1u].y)); + dirChoiceCount++; + } + else { + if ((cost < leastCost)) { + leastCost = cost; + dirChoices[0i] = vec2f(f32(neighborOffsets[1u].x), f32(neighborOffsets[1u].y)); + dirChoiceCount = 1i; + } + } + } + } + // unrolled iteration #2 + { + var neighborDensity = getCell((x + neighborOffsets[2u].x), (y + neighborOffsets[2u].y)); + let cost = (neighborDensity.z + (f32(neighborOffsets[2u].y) * gravityCost)); + if (isValidFlowOut((x + neighborOffsets[2u].x), (y + neighborOffsets[2u].y))) { + if ((cost == leastCost)) { + dirChoices[dirChoiceCount] = vec2f(f32(neighborOffsets[2u].x), f32(neighborOffsets[2u].y)); + dirChoiceCount++; } + else { + if ((cost < leastCost)) { + leastCost = cost; + dirChoices[0i] = vec2f(f32(neighborOffsets[2u].x), f32(neighborOffsets[2u].y)); + dirChoiceCount = 1i; + } + } + } + } + // unrolled iteration #3 + { + var neighborDensity = getCell((x + neighborOffsets[3u].x), (y + neighborOffsets[3u].y)); + let cost = (neighborDensity.z + (f32(neighborOffsets[3u].y) * gravityCost)); + if (isValidFlowOut((x + neighborOffsets[3u].x), (y + neighborOffsets[3u].y))) { if ((cost == leastCost)) { - dirChoices[dirChoiceCount] = vec2f(f32((*offset).x), f32((*offset).y)); + dirChoices[dirChoiceCount] = vec2f(f32(neighborOffsets[3u].x), f32(neighborOffsets[3u].y)); dirChoiceCount++; } else { if ((cost < leastCost)) { leastCost = cost; - dirChoices[0i] = vec2f(f32((*offset).x), f32((*offset).y)); + dirChoices[0i] = vec2f(f32(neighborOffsets[3u].x), f32(neighborOffsets[3u].y)); dirChoiceCount = 1i; } } @@ -311,7 +362,7 @@ describe('fluid double buffering example', () => { @group(0) @binding(3) var obstacles: array; fn isInsideObstacle(x: i32, y: i32) -> bool { - for (var i = 0u; i < 4; i++) { + for (var i = 0u; i < 4u; i++) { let obs = (&obstacles[i]); { if (((*obs).enabled == 0u)) { @@ -358,22 +409,73 @@ describe('fluid double buffering example', () => { var leastCost = cell.z; var dirChoices = array(vec2f(), vec2f(), vec2f(), vec2f()); var dirChoiceCount = 1; - for (var i = 0u; i < 4; i++) { - let offset = (&neighborOffsets[i]); - { - var neighborDensity = getCell((x + (*offset).x), (y + (*offset).y)); - let cost = (neighborDensity.z + (f32((*offset).y) * gravityCost)); - if (!isValidFlowOut((x + (*offset).x), (y + (*offset).y))) { - continue; + // unrolled iteration #0 + { + var neighborDensity = getCell((x + neighborOffsets[0u].x), (y + neighborOffsets[0u].y)); + let cost = (neighborDensity.z + (f32(neighborOffsets[0u].y) * gravityCost)); + if (isValidFlowOut((x + neighborOffsets[0u].x), (y + neighborOffsets[0u].y))) { + if ((cost == leastCost)) { + dirChoices[dirChoiceCount] = vec2f(f32(neighborOffsets[0u].x), f32(neighborOffsets[0u].y)); + dirChoiceCount++; + } + else { + if ((cost < leastCost)) { + leastCost = cost; + dirChoices[0i] = vec2f(f32(neighborOffsets[0u].x), f32(neighborOffsets[0u].y)); + dirChoiceCount = 1i; + } + } + } + } + // unrolled iteration #1 + { + var neighborDensity = getCell((x + neighborOffsets[1u].x), (y + neighborOffsets[1u].y)); + let cost = (neighborDensity.z + (f32(neighborOffsets[1u].y) * gravityCost)); + if (isValidFlowOut((x + neighborOffsets[1u].x), (y + neighborOffsets[1u].y))) { + if ((cost == leastCost)) { + dirChoices[dirChoiceCount] = vec2f(f32(neighborOffsets[1u].x), f32(neighborOffsets[1u].y)); + dirChoiceCount++; + } + else { + if ((cost < leastCost)) { + leastCost = cost; + dirChoices[0i] = vec2f(f32(neighborOffsets[1u].x), f32(neighborOffsets[1u].y)); + dirChoiceCount = 1i; + } + } + } + } + // unrolled iteration #2 + { + var neighborDensity = getCell((x + neighborOffsets[2u].x), (y + neighborOffsets[2u].y)); + let cost = (neighborDensity.z + (f32(neighborOffsets[2u].y) * gravityCost)); + if (isValidFlowOut((x + neighborOffsets[2u].x), (y + neighborOffsets[2u].y))) { + if ((cost == leastCost)) { + dirChoices[dirChoiceCount] = vec2f(f32(neighborOffsets[2u].x), f32(neighborOffsets[2u].y)); + dirChoiceCount++; } + else { + if ((cost < leastCost)) { + leastCost = cost; + dirChoices[0i] = vec2f(f32(neighborOffsets[2u].x), f32(neighborOffsets[2u].y)); + dirChoiceCount = 1i; + } + } + } + } + // unrolled iteration #3 + { + var neighborDensity = getCell((x + neighborOffsets[3u].x), (y + neighborOffsets[3u].y)); + let cost = (neighborDensity.z + (f32(neighborOffsets[3u].y) * gravityCost)); + if (isValidFlowOut((x + neighborOffsets[3u].x), (y + neighborOffsets[3u].y))) { if ((cost == leastCost)) { - dirChoices[dirChoiceCount] = vec2f(f32((*offset).x), f32((*offset).y)); + dirChoices[dirChoiceCount] = vec2f(f32(neighborOffsets[3u].x), f32(neighborOffsets[3u].y)); dirChoiceCount++; } else { if ((cost < leastCost)) { leastCost = cost; - dirChoices[0i] = vec2f(f32((*offset).x), f32((*offset).y)); + dirChoices[0i] = vec2f(f32(neighborOffsets[3u].x), f32(neighborOffsets[3u].y)); dirChoiceCount = 1i; } } @@ -484,7 +586,7 @@ describe('fluid double buffering example', () => { @group(0) @binding(1) var obstacles: array; fn isInsideObstacle(x: i32, y: i32) -> bool { - for (var i = 0u; i < 4; i++) { + for (var i = 0u; i < 4u; i++) { let obs = (&obstacles[i]); { if (((*obs).enabled == 0u)) { diff --git a/packages/typegpu/tests/examples/individual/jelly-slider.test.ts b/packages/typegpu/tests/examples/individual/jelly-slider.test.ts index 544d5b5ae7..5f0e5daca5 100644 --- a/packages/typegpu/tests/examples/individual/jelly-slider.test.ts +++ b/packages/typegpu/tests/examples/individual/jelly-slider.test.ts @@ -608,9 +608,81 @@ describe('jelly-slider example', () => { var minColor = vec3f(9999); var maxColor = vec3f(-9999); var dimensions = textureDimensions(currentTexture); - for (var x = -1; (x <= 1i); x++) { - for (var y = -1; (y <= 1i); y++) { - var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(x, y)); + // unrolled iteration #0 + { + // unrolled iteration #0 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(-1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #1 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(-1, 0)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #2 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(-1, 1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + } + // unrolled iteration #1 + { + // unrolled iteration #0 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(0, -1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #1 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i()); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #2 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(0, 1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + } + // unrolled iteration #2 + { + // unrolled iteration #0 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(1, -1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #1 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(1, 0)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #2 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(1)); var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); var neighborColor = textureLoad(currentTexture, clampedCoord, 0); minColor = min(minColor, neighborColor.rgb); diff --git a/packages/typegpu/tests/examples/individual/jelly-switch.test.ts b/packages/typegpu/tests/examples/individual/jelly-switch.test.ts index 8b4760c223..1bc1427683 100644 --- a/packages/typegpu/tests/examples/individual/jelly-switch.test.ts +++ b/packages/typegpu/tests/examples/individual/jelly-switch.test.ts @@ -414,9 +414,81 @@ describe('jelly switch example', () => { var minColor = vec3f(9999); var maxColor = vec3f(-9999); var dimensions = textureDimensions(currentTexture); - for (var x = -1; (x <= 1i); x++) { - for (var y = -1; (y <= 1i); y++) { - var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(x, y)); + // unrolled iteration #0 + { + // unrolled iteration #0 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(-1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #1 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(-1, 0)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #2 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(-1, 1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + } + // unrolled iteration #1 + { + // unrolled iteration #0 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(0, -1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #1 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i()); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #2 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(0, 1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + } + // unrolled iteration #2 + { + // unrolled iteration #0 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(1, -1)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #1 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(1, 0)); + var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); + var neighborColor = textureLoad(currentTexture, clampedCoord, 0); + minColor = min(minColor, neighborColor.rgb); + maxColor = max(maxColor, neighborColor.rgb); + } + // unrolled iteration #2 + { + var sampleCoord = (vec2i(_arg_0.gid.xy) + vec2i(1)); var clampedCoord = clamp(sampleCoord, vec2i(), (vec2i(dimensions.xy) - vec2i(1))); var neighborColor = textureLoad(currentTexture, clampedCoord, 0); minColor = min(minColor, neighborColor.rgb); diff --git a/packages/typegpu/tests/examples/individual/jump-flood-distance.test.ts b/packages/typegpu/tests/examples/individual/jump-flood-distance.test.ts index 41b9960c3a..a72e565679 100644 --- a/packages/typegpu/tests/examples/individual/jump-flood-distance.test.ts +++ b/packages/typegpu/tests/examples/individual/jump-flood-distance.test.ts @@ -80,9 +80,161 @@ describe('jump flood (distance) example', () => { var bestOutsideCoord = vec2f(-1); var bestInsideDist = 1e+20; var bestOutsideDist = 1e+20; - for (var dy = -1; (dy <= 1i); dy++) { - for (var dx = -1; (dx <= 1i); dx++) { - var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((dx * offset), (dy * offset))); + // unrolled iteration #0 + { + // unrolled iteration #0 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((-1i * offset), (-1i * offset))); + if ((sample.inside.x >= 0f)) { + let dInside = distance(pos, (sample.inside * vec2f(size))); + if ((dInside < bestInsideDist)) { + bestInsideDist = dInside; + bestInsideCoord = sample.inside; + } + } + if ((sample.outside.x >= 0f)) { + let dOutside = distance(pos, (sample.outside * vec2f(size))); + if ((dOutside < bestOutsideDist)) { + bestOutsideDist = dOutside; + bestOutsideCoord = sample.outside; + } + } + } + // unrolled iteration #1 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((-1i * offset), (0i * offset))); + if ((sample.inside.x >= 0f)) { + let dInside = distance(pos, (sample.inside * vec2f(size))); + if ((dInside < bestInsideDist)) { + bestInsideDist = dInside; + bestInsideCoord = sample.inside; + } + } + if ((sample.outside.x >= 0f)) { + let dOutside = distance(pos, (sample.outside * vec2f(size))); + if ((dOutside < bestOutsideDist)) { + bestOutsideDist = dOutside; + bestOutsideCoord = sample.outside; + } + } + } + // unrolled iteration #2 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((-1i * offset), (1i * offset))); + if ((sample.inside.x >= 0f)) { + let dInside = distance(pos, (sample.inside * vec2f(size))); + if ((dInside < bestInsideDist)) { + bestInsideDist = dInside; + bestInsideCoord = sample.inside; + } + } + if ((sample.outside.x >= 0f)) { + let dOutside = distance(pos, (sample.outside * vec2f(size))); + if ((dOutside < bestOutsideDist)) { + bestOutsideDist = dOutside; + bestOutsideCoord = sample.outside; + } + } + } + } + // unrolled iteration #1 + { + // unrolled iteration #0 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((0i * offset), (-1i * offset))); + if ((sample.inside.x >= 0f)) { + let dInside = distance(pos, (sample.inside * vec2f(size))); + if ((dInside < bestInsideDist)) { + bestInsideDist = dInside; + bestInsideCoord = sample.inside; + } + } + if ((sample.outside.x >= 0f)) { + let dOutside = distance(pos, (sample.outside * vec2f(size))); + if ((dOutside < bestOutsideDist)) { + bestOutsideDist = dOutside; + bestOutsideCoord = sample.outside; + } + } + } + // unrolled iteration #1 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((0i * offset), (0i * offset))); + if ((sample.inside.x >= 0f)) { + let dInside = distance(pos, (sample.inside * vec2f(size))); + if ((dInside < bestInsideDist)) { + bestInsideDist = dInside; + bestInsideCoord = sample.inside; + } + } + if ((sample.outside.x >= 0f)) { + let dOutside = distance(pos, (sample.outside * vec2f(size))); + if ((dOutside < bestOutsideDist)) { + bestOutsideDist = dOutside; + bestOutsideCoord = sample.outside; + } + } + } + // unrolled iteration #2 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((0i * offset), (1i * offset))); + if ((sample.inside.x >= 0f)) { + let dInside = distance(pos, (sample.inside * vec2f(size))); + if ((dInside < bestInsideDist)) { + bestInsideDist = dInside; + bestInsideCoord = sample.inside; + } + } + if ((sample.outside.x >= 0f)) { + let dOutside = distance(pos, (sample.outside * vec2f(size))); + if ((dOutside < bestOutsideDist)) { + bestOutsideDist = dOutside; + bestOutsideCoord = sample.outside; + } + } + } + } + // unrolled iteration #2 + { + // unrolled iteration #0 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((1i * offset), (-1i * offset))); + if ((sample.inside.x >= 0f)) { + let dInside = distance(pos, (sample.inside * vec2f(size))); + if ((dInside < bestInsideDist)) { + bestInsideDist = dInside; + bestInsideCoord = sample.inside; + } + } + if ((sample.outside.x >= 0f)) { + let dOutside = distance(pos, (sample.outside * vec2f(size))); + if ((dOutside < bestOutsideDist)) { + bestOutsideDist = dOutside; + bestOutsideCoord = sample.outside; + } + } + } + // unrolled iteration #1 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((1i * offset), (0i * offset))); + if ((sample.inside.x >= 0f)) { + let dInside = distance(pos, (sample.inside * vec2f(size))); + if ((dInside < bestInsideDist)) { + bestInsideDist = dInside; + bestInsideCoord = sample.inside; + } + } + if ((sample.outside.x >= 0f)) { + let dOutside = distance(pos, (sample.outside * vec2f(size))); + if ((dOutside < bestOutsideDist)) { + bestOutsideDist = dOutside; + bestOutsideCoord = sample.outside; + } + } + } + // unrolled iteration #2 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((1i * offset), (1i * offset))); if ((sample.inside.x >= 0f)) { let dInside = distance(pos, (sample.inside * vec2f(size))); if ((dInside < bestInsideDist)) { diff --git a/packages/typegpu/tests/examples/individual/jump-flood-voronoi.test.ts b/packages/typegpu/tests/examples/individual/jump-flood-voronoi.test.ts index fbb318b98c..0c82f1a43d 100644 --- a/packages/typegpu/tests/examples/individual/jump-flood-voronoi.test.ts +++ b/packages/typegpu/tests/examples/individual/jump-flood-voronoi.test.ts @@ -131,16 +131,111 @@ describe('jump flood (voronoi) example', () => { var size = textureDimensions(readView); var minDist = 1e+20; var bestSample = SampleResult(vec4f(), vec2f(-1)); - for (var dy = -1; (dy <= 1i); dy++) { - for (var dx = -1; (dx <= 1i); dx++) { - var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((dx * offset), (dy * offset))); - if ((sample.coord.x < 0f)) { - continue; + // unrolled iteration #0 + { + // unrolled iteration #0 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((-1i * offset), (-1i * offset))); + if ((sample.coord.x >= 0f)) { + let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); + if ((dist < minDist)) { + minDist = dist; + bestSample = sample; + } } - let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); - if ((dist < minDist)) { - minDist = dist; - bestSample = sample; + } + // unrolled iteration #1 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((0i * offset), (-1i * offset))); + if ((sample.coord.x >= 0f)) { + let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); + if ((dist < minDist)) { + minDist = dist; + bestSample = sample; + } + } + } + // unrolled iteration #2 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((1i * offset), (-1i * offset))); + if ((sample.coord.x >= 0f)) { + let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); + if ((dist < minDist)) { + minDist = dist; + bestSample = sample; + } + } + } + } + // unrolled iteration #1 + { + // unrolled iteration #0 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((-1i * offset), (0i * offset))); + if ((sample.coord.x >= 0f)) { + let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); + if ((dist < minDist)) { + minDist = dist; + bestSample = sample; + } + } + } + // unrolled iteration #1 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((0i * offset), (0i * offset))); + if ((sample.coord.x >= 0f)) { + let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); + if ((dist < minDist)) { + minDist = dist; + bestSample = sample; + } + } + } + // unrolled iteration #2 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((1i * offset), (0i * offset))); + if ((sample.coord.x >= 0f)) { + let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); + if ((dist < minDist)) { + minDist = dist; + bestSample = sample; + } + } + } + } + // unrolled iteration #2 + { + // unrolled iteration #0 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((-1i * offset), (1i * offset))); + if ((sample.coord.x >= 0f)) { + let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); + if ((dist < minDist)) { + minDist = dist; + bestSample = sample; + } + } + } + // unrolled iteration #1 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((0i * offset), (1i * offset))); + if ((sample.coord.x >= 0f)) { + let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); + if ((dist < minDist)) { + minDist = dist; + bestSample = sample; + } + } + } + // unrolled iteration #2 + { + var sample = sampleWithOffset(readView, vec2i(i32(x), i32(y)), vec2i((1i * offset), (1i * offset))); + if ((sample.coord.x >= 0f)) { + let dist = distance(vec2f(f32(x), f32(y)), (sample.coord * vec2f(size))); + if ((dist < minDist)) { + minDist = dist; + bestSample = sample; + } } } } diff --git a/packages/typegpu/tests/examples/individual/liquid-glass.test.ts b/packages/typegpu/tests/examples/individual/liquid-glass.test.ts index 39c3e0995e..5a7e2af782 100644 --- a/packages/typegpu/tests/examples/individual/liquid-glass.test.ts +++ b/packages/typegpu/tests/examples/individual/liquid-glass.test.ts @@ -107,9 +107,20 @@ describe('liquid-glass example', () => { fn sampleWithChromaticAberration(tex: texture_2d, sampler2: sampler, uv: vec2f, offset: f32, dir: vec2f, blur: f32) -> vec3f { var samples = array(); - for (var i = 0; (i < 3i); i++) { - var channelOffset = (dir * ((f32(i) - 1f) * offset)); - samples[i] = textureSampleBias(tex, sampler2, (uv - channelOffset), blur).rgb; + // unrolled iteration #0 + { + var channelOffset = (dir * (-1f * offset)); + samples[0i] = textureSampleBias(tex, sampler2, (uv - channelOffset), blur).rgb; + } + // unrolled iteration #1 + { + var channelOffset = (dir * (0f * offset)); + samples[1i] = textureSampleBias(tex, sampler2, (uv - channelOffset), blur).rgb; + } + // unrolled iteration #2 + { + var channelOffset = (dir * (1f * offset)); + samples[2i] = textureSampleBias(tex, sampler2, (uv - channelOffset), blur).rgb; } return vec3f(samples[0i].x, samples[1i].y, samples[2i].z); } diff --git a/packages/typegpu/tests/examples/individual/ripple-cube.test.ts b/packages/typegpu/tests/examples/individual/ripple-cube.test.ts index dbe13f72fd..199a1408ed 100644 --- a/packages/typegpu/tests/examples/individual/ripple-cube.test.ts +++ b/packages/typegpu/tests/examples/individual/ripple-cube.test.ts @@ -478,8 +478,13 @@ describe('ripple-cube example', () => { let material = (&materialUniform); var f0 = mix(vec3f(0.03999999910593033), (*material).albedo, (*material).metallic); var lo = vec3f(); - for (var i = 0; (i < 2i); i++) { - lo = (lo + evaluateLight(p, n, v, lightsUniform[i], (*material), f0)); + // unrolled iteration #0 + { + lo = (lo + evaluateLight(p, n, v, lightsUniform[0i], (*material), f0)); + } + // unrolled iteration #1 + { + lo = (lo + evaluateLight(p, n, v, lightsUniform[1i], (*material), f0)); } var reflectDir = reflect(v, n); var pScaled = (p * 50f); @@ -559,9 +564,81 @@ describe('ripple-cube example', () => { var historyColor = textureLoad(historyTexture, coord, 0); var minColor = vec3f(9999); var maxColor = vec3f(-9999); - for (var ox = -1; (ox <= 1i); ox++) { - for (var oy = -1; (oy <= 1i); oy++) { - var sampleCoord = (coord + vec2i(ox, oy)); + // unrolled iteration #0 + { + // unrolled iteration #0 + { + var sampleCoord = (coord + vec2i(-1)); + var clampedCoord = clamp(sampleCoord, vec2i(), vec2i(181)); + var neighbor = textureLoad(currentTexture, clampedCoord, 0).rgb; + minColor = min(minColor, neighbor); + maxColor = max(maxColor, neighbor); + } + // unrolled iteration #1 + { + var sampleCoord = (coord + vec2i(-1, 0)); + var clampedCoord = clamp(sampleCoord, vec2i(), vec2i(181)); + var neighbor = textureLoad(currentTexture, clampedCoord, 0).rgb; + minColor = min(minColor, neighbor); + maxColor = max(maxColor, neighbor); + } + // unrolled iteration #2 + { + var sampleCoord = (coord + vec2i(-1, 1)); + var clampedCoord = clamp(sampleCoord, vec2i(), vec2i(181)); + var neighbor = textureLoad(currentTexture, clampedCoord, 0).rgb; + minColor = min(minColor, neighbor); + maxColor = max(maxColor, neighbor); + } + } + // unrolled iteration #1 + { + // unrolled iteration #0 + { + var sampleCoord = (coord + vec2i(0, -1)); + var clampedCoord = clamp(sampleCoord, vec2i(), vec2i(181)); + var neighbor = textureLoad(currentTexture, clampedCoord, 0).rgb; + minColor = min(minColor, neighbor); + maxColor = max(maxColor, neighbor); + } + // unrolled iteration #1 + { + var sampleCoord = (coord + vec2i()); + var clampedCoord = clamp(sampleCoord, vec2i(), vec2i(181)); + var neighbor = textureLoad(currentTexture, clampedCoord, 0).rgb; + minColor = min(minColor, neighbor); + maxColor = max(maxColor, neighbor); + } + // unrolled iteration #2 + { + var sampleCoord = (coord + vec2i(0, 1)); + var clampedCoord = clamp(sampleCoord, vec2i(), vec2i(181)); + var neighbor = textureLoad(currentTexture, clampedCoord, 0).rgb; + minColor = min(minColor, neighbor); + maxColor = max(maxColor, neighbor); + } + } + // unrolled iteration #2 + { + // unrolled iteration #0 + { + var sampleCoord = (coord + vec2i(1, -1)); + var clampedCoord = clamp(sampleCoord, vec2i(), vec2i(181)); + var neighbor = textureLoad(currentTexture, clampedCoord, 0).rgb; + minColor = min(minColor, neighbor); + maxColor = max(maxColor, neighbor); + } + // unrolled iteration #1 + { + var sampleCoord = (coord + vec2i(1, 0)); + var clampedCoord = clamp(sampleCoord, vec2i(), vec2i(181)); + var neighbor = textureLoad(currentTexture, clampedCoord, 0).rgb; + minColor = min(minColor, neighbor); + maxColor = max(maxColor, neighbor); + } + // unrolled iteration #2 + { + var sampleCoord = (coord + vec2i(1)); var clampedCoord = clamp(sampleCoord, vec2i(), vec2i(181)); var neighbor = textureLoad(currentTexture, clampedCoord, 0).rgb; minColor = min(minColor, neighbor); diff --git a/packages/typegpu/tests/examples/individual/slime-mold-3d.test.ts b/packages/typegpu/tests/examples/individual/slime-mold-3d.test.ts index 0f1b46e97e..c2211a2e8f 100644 --- a/packages/typegpu/tests/examples/individual/slime-mold-3d.test.ts +++ b/packages/typegpu/tests/examples/individual/slime-mold-3d.test.ts @@ -184,9 +184,86 @@ describe('slime mold 3d example', () => { var totalWeight = 0f; var perp1 = getPerpendicular(direction); var perp2 = cross(direction, perp1); - const numSamples = 8; - for (var i = 0; (i < numSamples); i++) { - let theta = (((f32(i) / f32(numSamples)) * 2f) * 3.141592653589793f); + // unrolled iteration #0 + { + const theta = 0.; + var coneOffset = ((perp1 * cos(theta)) + (perp2 * sin(theta))); + var sensorDir = normalize((direction + (coneOffset * sin(params.sensorAngle)))); + var sensorPos = (pos + (sensorDir * params.sensorDistance)); + var sensorPosInt = vec3u(clamp(sensorPos, vec3f(), (dimsf - vec3f(1)))); + let weight = textureLoad(oldState, sensorPosInt).x; + weightedDir = (weightedDir + (sensorDir * weight)); + totalWeight = (totalWeight + weight); + } + // unrolled iteration #1 + { + const theta = 0.7853981633974483; + var coneOffset = ((perp1 * cos(theta)) + (perp2 * sin(theta))); + var sensorDir = normalize((direction + (coneOffset * sin(params.sensorAngle)))); + var sensorPos = (pos + (sensorDir * params.sensorDistance)); + var sensorPosInt = vec3u(clamp(sensorPos, vec3f(), (dimsf - vec3f(1)))); + let weight = textureLoad(oldState, sensorPosInt).x; + weightedDir = (weightedDir + (sensorDir * weight)); + totalWeight = (totalWeight + weight); + } + // unrolled iteration #2 + { + const theta = 1.5707963267948966; + var coneOffset = ((perp1 * cos(theta)) + (perp2 * sin(theta))); + var sensorDir = normalize((direction + (coneOffset * sin(params.sensorAngle)))); + var sensorPos = (pos + (sensorDir * params.sensorDistance)); + var sensorPosInt = vec3u(clamp(sensorPos, vec3f(), (dimsf - vec3f(1)))); + let weight = textureLoad(oldState, sensorPosInt).x; + weightedDir = (weightedDir + (sensorDir * weight)); + totalWeight = (totalWeight + weight); + } + // unrolled iteration #3 + { + const theta = 2.356194490192345; + var coneOffset = ((perp1 * cos(theta)) + (perp2 * sin(theta))); + var sensorDir = normalize((direction + (coneOffset * sin(params.sensorAngle)))); + var sensorPos = (pos + (sensorDir * params.sensorDistance)); + var sensorPosInt = vec3u(clamp(sensorPos, vec3f(), (dimsf - vec3f(1)))); + let weight = textureLoad(oldState, sensorPosInt).x; + weightedDir = (weightedDir + (sensorDir * weight)); + totalWeight = (totalWeight + weight); + } + // unrolled iteration #4 + { + const theta = 3.141592653589793; + var coneOffset = ((perp1 * cos(theta)) + (perp2 * sin(theta))); + var sensorDir = normalize((direction + (coneOffset * sin(params.sensorAngle)))); + var sensorPos = (pos + (sensorDir * params.sensorDistance)); + var sensorPosInt = vec3u(clamp(sensorPos, vec3f(), (dimsf - vec3f(1)))); + let weight = textureLoad(oldState, sensorPosInt).x; + weightedDir = (weightedDir + (sensorDir * weight)); + totalWeight = (totalWeight + weight); + } + // unrolled iteration #5 + { + const theta = 3.9269908169872414; + var coneOffset = ((perp1 * cos(theta)) + (perp2 * sin(theta))); + var sensorDir = normalize((direction + (coneOffset * sin(params.sensorAngle)))); + var sensorPos = (pos + (sensorDir * params.sensorDistance)); + var sensorPosInt = vec3u(clamp(sensorPos, vec3f(), (dimsf - vec3f(1)))); + let weight = textureLoad(oldState, sensorPosInt).x; + weightedDir = (weightedDir + (sensorDir * weight)); + totalWeight = (totalWeight + weight); + } + // unrolled iteration #6 + { + const theta = 4.71238898038469; + var coneOffset = ((perp1 * cos(theta)) + (perp2 * sin(theta))); + var sensorDir = normalize((direction + (coneOffset * sin(params.sensorAngle)))); + var sensorPos = (pos + (sensorDir * params.sensorDistance)); + var sensorPosInt = vec3u(clamp(sensorPos, vec3f(), (dimsf - vec3f(1)))); + let weight = textureLoad(oldState, sensorPosInt).x; + weightedDir = (weightedDir + (sensorDir * weight)); + totalWeight = (totalWeight + weight); + } + // unrolled iteration #7 + { + const theta = 5.497787143782138; var coneOffset = ((perp1 * cos(theta)) + (perp2 * sin(theta))); var sensorDir = normalize((direction + (coneOffset * sin(params.sensorAngle)))); var sensorPos = (pos + (sensorDir * params.sensorDistance)); diff --git a/packages/typegpu/tests/examples/individual/slime-mold.test.ts b/packages/typegpu/tests/examples/individual/slime-mold.test.ts index 9cdf638876..42f5f6beab 100644 --- a/packages/typegpu/tests/examples/individual/slime-mold.test.ts +++ b/packages/typegpu/tests/examples/individual/slime-mold.test.ts @@ -93,9 +93,97 @@ describe('slime mold example', () => { } var sum = vec3f(); var count = 0f; - for (var offsetY = -1; (offsetY <= 1i); offsetY++) { - for (var offsetX = -1; (offsetX <= 1i); offsetX++) { - var samplePos = (vec2i(_arg_0.gid.xy) + vec2i(offsetX, offsetY)); + // unrolled iteration #0 + { + // unrolled iteration #0 + { + var samplePos = (vec2i(_arg_0.gid.xy) + vec2i(-1)); + var dimsi = vec2i(dims); + if (((((samplePos.x >= 0i) && (samplePos.x < dimsi.x)) && (samplePos.y >= 0i)) && (samplePos.y < dimsi.y))) { + var color = textureLoad(oldState, vec2u(samplePos)).rgb; + sum = (sum + color); + count = (count + 1f); + } + } + // unrolled iteration #1 + { + var samplePos = (vec2i(_arg_0.gid.xy) + vec2i(0, -1)); + var dimsi = vec2i(dims); + if (((((samplePos.x >= 0i) && (samplePos.x < dimsi.x)) && (samplePos.y >= 0i)) && (samplePos.y < dimsi.y))) { + var color = textureLoad(oldState, vec2u(samplePos)).rgb; + sum = (sum + color); + count = (count + 1f); + } + } + // unrolled iteration #2 + { + var samplePos = (vec2i(_arg_0.gid.xy) + vec2i(1, -1)); + var dimsi = vec2i(dims); + if (((((samplePos.x >= 0i) && (samplePos.x < dimsi.x)) && (samplePos.y >= 0i)) && (samplePos.y < dimsi.y))) { + var color = textureLoad(oldState, vec2u(samplePos)).rgb; + sum = (sum + color); + count = (count + 1f); + } + } + } + // unrolled iteration #1 + { + // unrolled iteration #0 + { + var samplePos = (vec2i(_arg_0.gid.xy) + vec2i(-1, 0)); + var dimsi = vec2i(dims); + if (((((samplePos.x >= 0i) && (samplePos.x < dimsi.x)) && (samplePos.y >= 0i)) && (samplePos.y < dimsi.y))) { + var color = textureLoad(oldState, vec2u(samplePos)).rgb; + sum = (sum + color); + count = (count + 1f); + } + } + // unrolled iteration #1 + { + var samplePos = (vec2i(_arg_0.gid.xy) + vec2i()); + var dimsi = vec2i(dims); + if (((((samplePos.x >= 0i) && (samplePos.x < dimsi.x)) && (samplePos.y >= 0i)) && (samplePos.y < dimsi.y))) { + var color = textureLoad(oldState, vec2u(samplePos)).rgb; + sum = (sum + color); + count = (count + 1f); + } + } + // unrolled iteration #2 + { + var samplePos = (vec2i(_arg_0.gid.xy) + vec2i(1, 0)); + var dimsi = vec2i(dims); + if (((((samplePos.x >= 0i) && (samplePos.x < dimsi.x)) && (samplePos.y >= 0i)) && (samplePos.y < dimsi.y))) { + var color = textureLoad(oldState, vec2u(samplePos)).rgb; + sum = (sum + color); + count = (count + 1f); + } + } + } + // unrolled iteration #2 + { + // unrolled iteration #0 + { + var samplePos = (vec2i(_arg_0.gid.xy) + vec2i(-1, 1)); + var dimsi = vec2i(dims); + if (((((samplePos.x >= 0i) && (samplePos.x < dimsi.x)) && (samplePos.y >= 0i)) && (samplePos.y < dimsi.y))) { + var color = textureLoad(oldState, vec2u(samplePos)).rgb; + sum = (sum + color); + count = (count + 1f); + } + } + // unrolled iteration #1 + { + var samplePos = (vec2i(_arg_0.gid.xy) + vec2i(0, 1)); + var dimsi = vec2i(dims); + if (((((samplePos.x >= 0i) && (samplePos.x < dimsi.x)) && (samplePos.y >= 0i)) && (samplePos.y < dimsi.y))) { + var color = textureLoad(oldState, vec2u(samplePos)).rgb; + sum = (sum + color); + count = (count + 1f); + } + } + // unrolled iteration #2 + { + var samplePos = (vec2i(_arg_0.gid.xy) + vec2i(1)); var dimsi = vec2i(dims); if (((((samplePos.x >= 0i) && (samplePos.x < dimsi.x)) && (samplePos.y >= 0i)) && (samplePos.y < dimsi.y))) { var color = textureLoad(oldState, vec2u(samplePos)).rgb; diff --git a/packages/typegpu/tests/examples/individual/stable-fluid.test.ts b/packages/typegpu/tests/examples/individual/stable-fluid.test.ts index 040a569566..a82228bd28 100644 --- a/packages/typegpu/tests/examples/individual/stable-fluid.test.ts +++ b/packages/typegpu/tests/examples/individual/stable-fluid.test.ts @@ -62,8 +62,21 @@ describe('stable-fluid example', () => { fn getNeighbors(coords: vec2i, bounds: vec2i) -> array { var adjacentOffsets = array(vec2i(-1, 0), vec2i(0, -1), vec2i(1, 0), vec2i(0, 1)); - for (var i = 0; (i < 4i); i++) { - adjacentOffsets[i] = clamp((coords + adjacentOffsets[i]), vec2i(), (bounds - vec2i(1))); + // unrolled iteration #0 + { + adjacentOffsets[0i] = clamp((coords + adjacentOffsets[0i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #1 + { + adjacentOffsets[1i] = clamp((coords + adjacentOffsets[1i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #2 + { + adjacentOffsets[2i] = clamp((coords + adjacentOffsets[2i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #3 + { + adjacentOffsets[3i] = clamp((coords + adjacentOffsets[3i]), vec2i(), (bounds - vec2i(1))); } return adjacentOffsets; } @@ -102,8 +115,21 @@ describe('stable-fluid example', () => { fn getNeighbors(coords: vec2i, bounds: vec2i) -> array { var adjacentOffsets = array(vec2i(-1, 0), vec2i(0, -1), vec2i(1, 0), vec2i(0, 1)); - for (var i = 0; (i < 4i); i++) { - adjacentOffsets[i] = clamp((coords + adjacentOffsets[i]), vec2i(), (bounds - vec2i(1))); + // unrolled iteration #0 + { + adjacentOffsets[0i] = clamp((coords + adjacentOffsets[0i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #1 + { + adjacentOffsets[1i] = clamp((coords + adjacentOffsets[1i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #2 + { + adjacentOffsets[2i] = clamp((coords + adjacentOffsets[2i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #3 + { + adjacentOffsets[3i] = clamp((coords + adjacentOffsets[3i]), vec2i(), (bounds - vec2i(1))); } return adjacentOffsets; } @@ -130,8 +156,21 @@ describe('stable-fluid example', () => { fn getNeighbors(coords: vec2i, bounds: vec2i) -> array { var adjacentOffsets = array(vec2i(-1, 0), vec2i(0, -1), vec2i(1, 0), vec2i(0, 1)); - for (var i = 0; (i < 4i); i++) { - adjacentOffsets[i] = clamp((coords + adjacentOffsets[i]), vec2i(), (bounds - vec2i(1))); + // unrolled iteration #0 + { + adjacentOffsets[0i] = clamp((coords + adjacentOffsets[0i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #1 + { + adjacentOffsets[1i] = clamp((coords + adjacentOffsets[1i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #2 + { + adjacentOffsets[2i] = clamp((coords + adjacentOffsets[2i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #3 + { + adjacentOffsets[3i] = clamp((coords + adjacentOffsets[3i]), vec2i(), (bounds - vec2i(1))); } return adjacentOffsets; } @@ -161,8 +200,21 @@ describe('stable-fluid example', () => { fn getNeighbors(coords: vec2i, bounds: vec2i) -> array { var adjacentOffsets = array(vec2i(-1, 0), vec2i(0, -1), vec2i(1, 0), vec2i(0, 1)); - for (var i = 0; (i < 4i); i++) { - adjacentOffsets[i] = clamp((coords + adjacentOffsets[i]), vec2i(), (bounds - vec2i(1))); + // unrolled iteration #0 + { + adjacentOffsets[0i] = clamp((coords + adjacentOffsets[0i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #1 + { + adjacentOffsets[1i] = clamp((coords + adjacentOffsets[1i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #2 + { + adjacentOffsets[2i] = clamp((coords + adjacentOffsets[2i]), vec2i(), (bounds - vec2i(1))); + } + // unrolled iteration #3 + { + adjacentOffsets[3i] = clamp((coords + adjacentOffsets[3i]), vec2i(), (bounds - vec2i(1))); } return adjacentOffsets; } diff --git a/packages/typegpu/tests/tgsl/wgslGenerator.test.ts b/packages/typegpu/tests/tgsl/wgslGenerator.test.ts index ae5e4bf6ff..45e8123b71 100644 --- a/packages/typegpu/tests/tgsl/wgslGenerator.test.ts +++ b/packages/typegpu/tests/tgsl/wgslGenerator.test.ts @@ -487,7 +487,7 @@ describe('wgslGenerator', () => { "fn main() { var arr = array(1f, 2f, 3f); var res = 0f; - for (var i = 0u; i < 3; i++) { + for (var i = 0u; i < 3u; i++) { let foo = arr[i]; { res += foo; @@ -513,10 +513,10 @@ describe('wgslGenerator', () => { "fn main() { var arr = array(1f, 2f, 3f); var res = 0f; - for (var i = 0u; i < 3; i++) { + for (var i = 0u; i < 3u; i++) { let foo = arr[i]; { - for (var i_1 = 0u; i_1 < 3; i_1++) { + for (var i_1 = 0u; i_1 < 3u; i_1++) { let boo = arr[i_1]; { res += (foo * boo); @@ -544,10 +544,10 @@ describe('wgslGenerator', () => { "fn main() { var arr = array(1f, 2f, 3f); var res = 0f; - for (var i = 0u; i < 3; i++) { + for (var i = 0u; i < 3u; i++) { let foo = arr[i]; { - for (var i_1 = 0u; i_1 < 3; i_1++) { + for (var i_1 = 0u; i_1 < 3u; i_1++) { let foo2 = arr[i_1]; { res += (foo2 * foo2); @@ -573,7 +573,7 @@ describe('wgslGenerator', () => { "fn main() { var arr = array(vec2f(1), vec2f(2), vec2f(3)); var res = 0; - for (var i = 0u; i < 3; i++) { + for (var i = 0u; i < 3u; i++) { let foo = (&arr[i]); { res += i32((*foo).x); @@ -630,14 +630,14 @@ describe('wgslGenerator', () => { expect(tgpu.resolve([main])).toMatchInlineSnapshot(` "fn main() { var v1 = vec4u(44, 88, 132, 176); - for (var i = 0u; i < 4; i++) { + for (var i = 0u; i < 4u; i++) { let foo = v1[i]; { continue; } } var v2 = vec2f(1, 2); - for (var i = 0u; i < 2; i++) { + for (var i = 0u; i < 2u; i++) { let foo = v2[i]; { continue; @@ -647,6 +647,36 @@ describe('wgslGenerator', () => { `); }); + it('creates correct code for "for ... of ..." statements using buffer iterable', ({ root }) => { + const b = root.createUniform(d.arrayOf(d.u32, 7)); + const acc = tgpu.accessor(d.arrayOf(d.u32, 7), b); + + const f = () => { + 'use gpu'; + let result = d.u32(0); + for (const foo of acc.$) { + result += foo; + } + + return result; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "@group(0) @binding(0) var b: array; + + fn f() -> u32 { + var result = 0u; + for (var i = 0u; i < 7u; i++) { + let foo = b[i]; + { + result += foo; + } + } + return result; + }" + `); + }); + it('creates correct code for "for ... of ..." statements using vector iterables', () => { const main = () => { 'use gpu'; @@ -679,19 +709,19 @@ describe('wgslGenerator', () => { var res1 = 0f; var res2 = 0u; var res3 = false; - for (var i = 0u; i < 4; i++) { + for (var i = 0u; i < 4u; i++) { let foo = v1[i]; { res1 += foo; } } - for (var i = 0u; i < 3; i++) { + for (var i = 0u; i < 3u; i++) { let foo = v2[i]; { res2 *= foo; } } - for (var i = 0u; i < 2; i++) { + for (var i = 0u; i < 2u; i++) { let foo = v3[i]; { res3 = (foo != res3); @@ -721,7 +751,7 @@ describe('wgslGenerator', () => { fn main() { var testStruct = TestStruct(array(1f, 8f, 8f, 2f)); - for (var i = 0u; i < 4; i++) { + for (var i = 0u; i < 4u; i++) { let foo = testStruct.arr[i]; { continue; @@ -743,7 +773,10 @@ describe('wgslGenerator', () => { [Error: Resolution of the following tree failed: - - fn*:main - - fn*:main(): \`for ... of ...\` loops only support iterables stored in variables] + - fn*:main(): \`for ... of ...\` loops only support iterables stored in variables. + ----- + You can wrap iterable with \`tgpu.unroll(...)\`. If iterable is known at comptime, the loop will be unrolled. + -----] `); }); @@ -816,7 +849,7 @@ describe('wgslGenerator', () => { expect(tgpu.resolve([f1])).toMatchInlineSnapshot(` "fn f1() { var arr = array(1, 2, 3); - for (var i = 0u; i < 3; i++) { + for (var i = 0u; i < 3u; i++) { let foo = arr[i]; { let i_1 = foo; @@ -838,7 +871,7 @@ describe('wgslGenerator', () => { "fn f2() { const i = 7; var arr = array(1, 2, 3); - for (var i_1 = 0u; i_1 < 3; i_1++) { + for (var i_1 = 0u; i_1 < 3u; i_1++) { let foo = arr[i_1]; { continue; @@ -864,7 +897,7 @@ describe('wgslGenerator', () => { fn f() { var arr = array(1u, 2u, 3u, i); - for (var i_1 = 0u; i_1 < 4; i_1++) { + for (var i_1 = 0u; i_1 < 4u; i_1++) { let foo = arr[i_1]; { continue; @@ -889,7 +922,7 @@ describe('wgslGenerator', () => { fn f() { var arr = array(1, 2, 3); - for (var i = 0u; i < 3; i++) { + for (var i = 0u; i < 3u; i++) { let foo = arr[i]; { let x = (foo + i32(i_1)); @@ -917,7 +950,7 @@ describe('wgslGenerator', () => { fn f() { var arr = array(1, 2, 3); - for (var i = 0u; i < 3; i++) { + for (var i = 0u; i < 3u; i++) { let foo = arr[i]; { let x = (foo + i32(i_1)); @@ -941,7 +974,7 @@ describe('wgslGenerator', () => { "fn f() { var arr = array(1, 2, 3); var res = 0; - for (var i = 0u; i < 3; i++) { + for (var i = 0u; i < 3u; i++) { let i_1 = arr[i]; { res += i_1; @@ -1079,11 +1112,11 @@ describe('wgslGenerator', () => { }); expect(tgpu.resolve([testFn])).toMatchInlineSnapshot(` - "fn testFn() -> u32 { - var arr = array(1u, 2u, 3u); - return arr[1i]; - }" - `); + "fn testFn() -> u32 { + var arr = array(1u, 2u, 3u); + return arr[1i]; + }" + `); const astInfo = getMetaData( testFn[$internal].implementation as (...args: unknown[]) => unknown, diff --git a/packages/typegpu/tests/unroll.test.ts b/packages/typegpu/tests/unroll.test.ts new file mode 100644 index 0000000000..d507b61da0 --- /dev/null +++ b/packages/typegpu/tests/unroll.test.ts @@ -0,0 +1,843 @@ +import { describe, expect, vi } from 'vitest'; +import { it } from './utils/extendedIt.ts'; +import * as d from '../src/data/index.ts'; +import tgpu from '../src/index.ts'; + +describe('tgpu.unroll', () => { + it('called outside the gpu function returns passed iterable', () => { + const arr = [1, 2, 3]; + const x = tgpu.unroll(arr); + + expect(x).toBe(arr); + }); + + it('called inside the gpu function but outside of forOf returns passed iterable', () => { + const layout = tgpu.bindGroupLayout({ + arr: { storage: d.arrayOf(d.f32) }, + }); + + const f = () => { + 'use gpu'; + const a = tgpu.unroll([1, 2, 3]); + + const v1 = d.vec2f(7); + const v2 = tgpu.unroll(v1); // this should return a pointer + const arr = tgpu.unroll(layout.$.arr); // this should return a pointer + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "@group(0) @binding(0) var arr_1: array; + + fn f() { + var a = array(1, 2, 3); + var v1 = vec2f(7); + let v2 = (&v1); + let arr = (&arr_1); + }" + `); + }); + + it('unrolls array expression of primitives', () => { + const f = () => { + 'use gpu'; + let res = 0; + for (const foo of tgpu.unroll([1, 2, 3])) { + res += foo; + } + return res; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> i32 { + var res = 0; + // unrolled iteration #0 + { + res += 1i; + } + // unrolled iteration #1 + { + res += 2i; + } + // unrolled iteration #2 + { + res += 3i; + } + return res; + }" + `); + }); + + it('unrolls correctly when loop variable is overriding', () => { + const f = () => { + 'use gpu'; + const foo = d.vec3f(6); + for (const foo of tgpu.unroll([1, 2])) { + const boo = foo; + } + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() { + var foo = vec3f(6); + // unrolled iteration #0 + { + const boo = 1; + } + // unrolled iteration #1 + { + const boo = 2; + } + }" + `); + }); + + it('unrolls correctly when loop variable is overridden', () => { + const f = () => { + 'use gpu'; + let fooResult = d.f32(0); + for (const foo of tgpu.unroll([1, 2])) { + const boo = foo; + { + const foo = boo; + fooResult += foo; + } + const bar = foo; + } + + return fooResult; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> f32 { + var fooResult = 0f; + // unrolled iteration #0 + { + const boo = 1; + { + const foo2 = boo; + fooResult += f32(foo2); + } + const bar = 1; + } + // unrolled iteration #1 + { + const boo = 2; + { + const foo2 = boo; + fooResult += f32(foo2); + } + const bar = 2; + } + return fooResult; + }" + `); + }); + + it('unrolls array expression of complex types', () => { + const Boid = d.struct({ + pos: d.vec2i, + vel: d.vec2f, + }); + + const f = () => { + 'use gpu'; + const b1 = Boid({ pos: d.vec2i(1), vel: d.vec2f(1) }); + const b2 = Boid({ pos: d.vec2i(2), vel: d.vec2f(2) }); + let res = d.vec2f(); + for (const foo of tgpu.unroll([b1, b2])) { + const boo = foo; + res = res.add(foo.vel); + boo.pos = d.vec2i(); + } + + return res; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "struct Boid { + pos: vec2i, + vel: vec2f, + } + + fn f() -> vec2f { + var b1 = Boid(vec2i(1), vec2f(1)); + var b2 = Boid(vec2i(2), vec2f(2)); + var res = vec2f(); + // unrolled iteration #0 + { + let boo = (&b1); + res = (res + b1.vel); + (*boo).pos = vec2i(); + } + // unrolled iteration #1 + { + let boo = (&b2); + res = (res + b2.vel); + (*boo).pos = vec2i(); + } + return res; + }" + `); + }); + + it('throws when iterable elements are ephemeral but not naturally emphemeral', () => { + const Boid = d.struct({ + pos: d.vec2i, + vel: d.vec2f, + }); + + const f = () => { + 'use gpu'; + for (const foo of tgpu.unroll([Boid()])) { + const boo = foo; + } + }; + + expect(() => tgpu.resolve([f])).toThrowErrorMatchingInlineSnapshot(` + [Error: Resolution of the following tree failed: + - + - fn*:f + - fn*:f(): Cannot unroll loop. The elements of iterable are emphemeral but not naturally ephemeral.] + `); + }); + + it('unrolls array expression of struct field names - (simple)', () => { + const values = { a: 1, b: 2, c: 3 }; + const list = Object.keys(values) as (keyof typeof values)[]; + + const f = () => { + 'use gpu'; + let result = d.u32(0); + for (const prop of tgpu.unroll(list)) { + result += values[prop]; + } + return result; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> u32 { + var result = 0u; + // unrolled iteration #0 + { + result += 1u; + } + // unrolled iteration #1 + { + result += 2u; + } + // unrolled iteration #2 + { + result += 3u; + } + return result; + }" + `); + }); + + it('unrolls array expression of struct field names - (complex)', () => { + const variants = { + foo: (x: number) => { + 'use gpu'; + return 6 * x; + }, + boo: (x: number) => { + 'use gpu'; + return 7 * x; + }, + }; + + const Weights = d.struct(Object.fromEntries( + Object.keys(variants).map((name) => [name, d.f32]), + )); + + const variantsKey = Object.keys(variants) as (keyof typeof variants)[]; + + const computeWeight = tgpu.fn([Weights], d.f32)( + (weights: d.Infer) => { + 'use gpu'; + + let p = d.f32(0); + for (const key of tgpu.unroll(variantsKey)) { + // @ts-expect-error: trust me + p += weights[key] * variants[key](p); + } + return p; + }, + ); + + expect(tgpu.resolve([computeWeight])).toMatchInlineSnapshot(` + "fn foo(x: f32) -> f32 { + return (6f * x); + } + + fn boo(x: f32) -> f32 { + return (7f * x); + } + + struct Weights { + foo: f32, + boo: f32, + } + + fn computeWeight(weights: Weights) -> f32 { + var p = 0f; + // unrolled iteration #0 + { + p += (weights.foo * foo(p)); + } + // unrolled iteration #1 + { + p += (weights.boo * boo(p)); + } + return p; + }" + `); + }); + + it('unrolls array expression of pointers', () => { + const f = () => { + 'use gpu'; + let res = d.vec2f(); + const v1 = d.vec2f(7); + const v2 = d.vec2f(3); + for (const foo of tgpu.unroll([v1, v2])) { + const boo = foo; + res = res.add(foo); + boo.x = 6; + } + + return res; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> vec2f { + var res = vec2f(); + var v1 = vec2f(7); + var v2 = vec2f(3); + // unrolled iteration #0 + { + let boo = (&v1); + res = (res + v1); + (*boo).x = 6f; + } + // unrolled iteration #1 + { + let boo = (&v2); + res = (res + v2); + (*boo).x = 6f; + } + return res; + }" + `); + }); + + it('unrolls ephemeral vector - (instance)', () => { + const f = () => { + 'use gpu'; + let res = d.u32(0); + for (const foo of tgpu.unroll(d.vec4u(1, 2, 3, 4))) { + res += foo; + } + + return res; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> u32 { + var res = 0u; + // unrolled iteration #0 + { + res += 1u; + } + // unrolled iteration #1 + { + res += 2u; + } + // unrolled iteration #2 + { + res += 3u; + } + // unrolled iteration #3 + { + res += 4u; + } + return res; + }" + `); + }); + + it('unrolls ephemeral vector - (string)', () => { + const f = () => { + 'use gpu'; + + const v = d.vec3f(7); + + let res = 0; + for (const pos of tgpu.unroll(d.vec3f(v))) { + res = res + pos; + } + + return res; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> i32 { + var v = vec3f(7); + var res = 0; + // unrolled iteration #0 + { + res = i32((f32(res) + v[0u])); + } + // unrolled iteration #1 + { + res = i32((f32(res) + v[1u])); + } + // unrolled iteration #2 + { + res = i32((f32(res) + v[2u])); + } + return res; + }" + `); + }); + + it('unrolls external comptime iterable', () => { + const arr = [1, 2, 3]; + + const f = () => { + 'use gpu'; + let result = 0; + for (const foo of tgpu.unroll(arr)) { + result += foo; + } + + return result; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> i32 { + var result = 0; + // unrolled iteration #0 + { + result += 1i; + } + // unrolled iteration #1 + { + result += 2i; + } + // unrolled iteration #2 + { + result += 3i; + } + return result; + }" + `); + }); + + it('throws when iterable is unknown at comptime', () => { + const layout = tgpu.bindGroupLayout({ + arr: { storage: d.arrayOf(d.f32) }, + }); + + const f = () => { + 'use gpu'; + let res = d.f32(0); + for (const foo of tgpu.unroll(layout.$.arr)) { + res += foo; + } + }; + + expect(() => tgpu.resolve([f])).toThrowErrorMatchingInlineSnapshot(` + [Error: Resolution of the following tree failed: + - + - fn*:f + - fn*:f(): Cannot unroll loop. Length of iterable is unknown at comptime.] + `); + }); + + it('unrolls named iterable of primitives', () => { + const f = () => { + 'use gpu'; + const arr = [1, 2, 3]; + let res = d.f32(0); + for (const foo of tgpu.unroll(arr)) { + res += foo; + } + + return res; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> f32 { + var arr = array(1, 2, 3); + var res = 0f; + // unrolled iteration #0 + { + res += f32(arr[0u]); + } + // unrolled iteration #1 + { + res += f32(arr[1u]); + } + // unrolled iteration #2 + { + res += f32(arr[2u]); + } + return res; + }" + `); + }); + + it('unrolls named iterable of vectors', () => { + const f = () => { + 'use gpu'; + + const v1 = d.vec2f(1); + const v2 = d.vec2f(8); + const v3 = d.vec2f(2); + const arr = d.arrayOf(d.vec2f, 4)([v1, v2, v2, v3]); + let res = d.vec2f(); + + for (const foo of tgpu.unroll(arr)) { + res = res.add(foo); + foo.x = 7; + } + + return res; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> vec2f { + var v1 = vec2f(1); + var v2 = vec2f(8); + var v3 = vec2f(2); + var arr = array(v1, v2, v2, v3); + var res = vec2f(); + // unrolled iteration #0 + { + res = (res + arr[0u]); + arr[0u].x = 7f; + } + // unrolled iteration #1 + { + res = (res + arr[1u]); + arr[1u].x = 7f; + } + // unrolled iteration #2 + { + res = (res + arr[2u]); + arr[2u].x = 7f; + } + // unrolled iteration #3 + { + res = (res + arr[3u]); + arr[3u].x = 7f; + } + return res; + }" + `); + }); + + it('unrolls named iterable of complex types', () => { + const Boid = d.struct({ + pos: d.vec2i, + vel: d.vec2f, + }); + + const f = () => { + 'use gpu'; + const b1 = Boid({ pos: d.vec2i(1), vel: d.vec2f(1) }); + const b2 = Boid({ pos: d.vec2i(2), vel: d.vec2f(2) }); + const arr = d.arrayOf(Boid, 2)([b1, b2]); + let res = d.vec2f(); + + for (const foo of tgpu.unroll(arr)) { + res = res.add(foo.vel); + foo.pos.x = 7; + } + + return res; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "struct Boid { + pos: vec2i, + vel: vec2f, + } + + fn f() -> vec2f { + var b1 = Boid(vec2i(1), vec2f(1)); + var b2 = Boid(vec2i(2), vec2f(2)); + var arr = array(b1, b2); + var res = vec2f(); + // unrolled iteration #0 + { + res = (res + arr[0u].vel); + arr[0u].pos.x = 7i; + } + // unrolled iteration #1 + { + res = (res + arr[1u].vel); + arr[1u].pos.x = 7i; + } + return res; + }" + `); + }); + + it('unrolls buffer iterable', ({ root }) => { + const b = root.createUniform(d.arrayOf(d.u32, 7)); + const acc = tgpu.accessor(d.arrayOf(d.u32, 7), b); + + const f = () => { + 'use gpu'; + let result = d.u32(0); + for (const foo of tgpu.unroll(acc.$)) { + result += foo; + } + + return result; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "@group(0) @binding(0) var b: array; + + fn f() -> u32 { + var result = 0u; + // unrolled iteration #0 + { + result += b[0u]; + } + // unrolled iteration #1 + { + result += b[1u]; + } + // unrolled iteration #2 + { + result += b[2u]; + } + // unrolled iteration #3 + { + result += b[3u]; + } + // unrolled iteration #4 + { + result += b[4u]; + } + // unrolled iteration #5 + { + result += b[5u]; + } + // unrolled iteration #6 + { + result += b[6u]; + } + return result; + }" + `); + }); + + it('can be conditionally applied', () => { + const unroll = tgpu.accessor(d.bool, true); + + const f = () => { + 'use gpu'; + const arr = [1, 2, 3]; + let r = d.f32(0); + for (const foo of (unroll.$ ? tgpu.unroll(arr) : arr)) { + r += foo; + } + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() { + var arr = array(1, 2, 3); + var r = 0f; + // unrolled iteration #0 + { + r += f32(arr[0u]); + } + // unrolled iteration #1 + { + r += f32(arr[1u]); + } + // unrolled iteration #2 + { + r += f32(arr[2u]); + } + }" + `); + expect(tgpu.resolve([tgpu.fn(f).with(unroll, false)])) + .toMatchInlineSnapshot(` + "fn f() { + var arr = array(1, 2, 3); + var r = 0f; + for (var i = 0u; i < 3u; i++) { + let foo = arr[i]; + { + r += f32(foo); + } + } + }" + `); + }); + + it('throws when `continue` or `break` is used inside the loop body', () => { + const f1 = () => { + 'use gpu'; + for (const foo of tgpu.unroll([1, 2])) { + continue; + } + }; + + expect(() => tgpu.resolve([f1])).toThrowErrorMatchingInlineSnapshot(` + [Error: Resolution of the following tree failed: + - + - fn*:f1 + - fn*:f1(): Cannot unroll loop containing \`continue\`] + `); + + const f2 = () => { + 'use gpu'; + for (const foo of tgpu.unroll([1, 2])) { + break; + } + }; + + expect(() => tgpu.resolve([f2])).toThrowErrorMatchingInlineSnapshot(` + [Error: Resolution of the following tree failed: + - + - fn*:f2 + - fn*:f2(): Cannot unroll loop containing \`break\`] + `); + }); + + it('throws when `continue` is used in nested blocks', () => { + const f = () => { + 'use gpu'; + for (const foo of tgpu.unroll([1, 2])) { + const boo = foo; + { + if (boo === foo) { + continue; + } + } + } + }; + + expect(() => tgpu.resolve([f])).toThrowErrorMatchingInlineSnapshot(` + [Error: Resolution of the following tree failed: + - + - fn*:f + - fn*:f(): Cannot unroll loop containing \`continue\`] + `); + }); + + it('unrolls when `continue` or `break` is used in nested loop', () => { + const f = () => { + 'use gpu'; + const arr = [1, 2, 3]; + + for (const foo of tgpu.unroll([1, 2])) { + for (let i = 0; i < 2; i++) { + if (i === foo) { + continue; + } + } + let i = 2; + while (i > 2) { + i--; + break; + } + + for (const boo of arr) { + continue; + } + } + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() { + var arr = array(1, 2, 3); + // unrolled iteration #0 + { + for (var i2 = 0; (i2 < 2i); i2++) { + if ((i2 == 1i)) { + continue; + } + } + var i = 2; + while ((i > 2i)) { + i--; + break; + } + for (var i_1 = 0u; i_1 < 3u; i_1++) { + let boo = arr[i_1]; + { + continue; + } + } + } + // unrolled iteration #1 + { + for (var i2 = 0; (i2 < 2i); i2++) { + if ((i2 == 2i)) { + continue; + } + } + var i = 2; + while ((i > 2i)) { + i--; + break; + } + for (var i_1 = 0u; i_1 < 3u; i_1++) { + let boo = arr[i_1]; + { + continue; + } + } + } + }" + `); + }); + + it('unrolling flag is set correctly', () => { + const f = () => { + 'use gpu'; + const arr = [1, 2, 3]; + + for (const foo of tgpu.unroll([1, 2])) { + for (const boo of arr) { + continue; + } + break; + } + }; + + expect(() => tgpu.resolve([f])).toThrowErrorMatchingInlineSnapshot(` + [Error: Resolution of the following tree failed: + - + - fn*:f + - fn*:f(): Cannot unroll loop containing \`break\`] + `); + }); + + it('unrolls correctly an empty loop', () => { + const arr: number[] = []; + const f = () => { + 'use gpu'; + let a = 0; + for (const foo of tgpu.unroll(arr)) { + a += 1; + } + return a; + }; + + expect(tgpu.resolve([f])).toMatchInlineSnapshot(` + "fn f() -> i32 { + var a = 0; + return a; + }" + `); + }); +});