Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions examples/python/compute-examples/compute-tiled-matmul.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import namedisl as nisl

import loopy as lp
from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2
from loopy.transform.compute import compute

import numpy as np
import numpy.linalg as la
import pyopencl as cl


def main(
M: int = 128,
N: int = 128,
K: int = 128,
bm: int = 32,
bn: int = 32,
bk: int = 16,
run_sequentially: bool = False,
use_precompute: bool = False,
use_compute: bool = False,
run_kernel: bool = False,
print_kernel: bool = False,
print_device_code: bool = False
) -> None:

knl = lp.make_kernel(
"{ [i, j, k] : 0 <= i < M and 0 <= j < N and 0 <= k < K }",
"""
a_(is, ks) := a[is, ks]
b_(ks, js) := b[ks, js]
c[i, j] = sum([k], a_(i, k) * b_(k, j))
""",
[
lp.GlobalArg("a", shape=(M, K), dtype=np.float64),
lp.GlobalArg("b", shape=(K, N), dtype=np.float64),
lp.GlobalArg("c", shape=(M, N), dtype=np.float64,
is_output=True)
]
)

# FIXME: without this, there are complaints about in-bounds access
# guarantees for the instruction that stores into c
knl = lp.fix_parameters(knl, M=M, N=N, K=K)

knl = lp.split_iname(knl, "i", bm, inner_iname="ii", outer_iname="io")
knl = lp.split_iname(knl, "j", bn, inner_iname="ji", outer_iname="jo")
knl = lp.split_iname(knl, "k", bk, inner_iname="ki", outer_iname="ko")

# FIXME: Given the input is already tiled, we shouldn't have to supply compute bounds here.
compute_map_a = nisl.make_map(f"""{{
[is, ks] -> [ii_s, io, ki_s, ko] :
is = io * {bm} + ii_s and
ks = ko * {bk} + ki_s
}}""")

compute_map_b = nisl.make_map(f"""{{
[ks, js] -> [ki_s, ko, ji_s, jo] :
js = jo * {bn} + ji_s and
ks = ko * {bk} + ki_s
}}""")

if use_compute:
knl = compute(
knl,
"a_",
compute_map=compute_map_a,
storage_indices=["ii_s", "ki_s"],
temporal_inames=["io", "ko", "jo"],
temporary_address_space=lp.AddressSpace.LOCAL
)

knl = compute(
knl,
"b_",
compute_map=compute_map_b,
storage_indices=["ki_s", "ji_s"],
temporal_inames=["io", "ko", "jo"],
temporary_address_space=lp.AddressSpace.LOCAL
)

if use_precompute:
knl = lp.precompute(
knl,
"a_",
sweep_inames=["ii", "ki"],
)

if not run_sequentially:
knl = lp.tag_inames(
knl, {
"io" : "g.0", # outer block loop over block rows
"jo" : "g.1", # outer block loop over block cols

"ii" : "l.0", # inner block loop over rows
"ji" : "l.1", # inner block loop over cols

"ii_s" : "l.0", # inner storage loop over a rows
"ji_s" : "l.0", # inner storage loop over b cols
"ki_s" : "l.1" # inner storage loop over a cols / b rows
}
)

knl = lp.add_inames_for_unused_hw_axes(knl)

if run_kernel:
a = np.random.randn(M, K)
b = np.random.randn(K, N)

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

ex = knl.executor(ctx)
_, out = ex(queue, a=a, b=b)

print(20*"=", "Tiled matmul report", 20*"=")
print(f"Problem size: M = {M:-4}, N = {N:-4}, K = {K:-4}")
print(f"Tile size : BM = {bm:-4}, BN = {bn:-4}, BK = {bk:-4}")
print(f"Relative error = {la.norm((a @ b) - out) / la.norm(out)}")
print((40 + len(" Tiled matmul report "))*"=")

if print_device_code:
print(lp.generate_code_v2(knl).device_code())

if print_kernel:
print(knl)


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser()

_ = parser.add_argument("--precompute", action="store_true")
_ = parser.add_argument("--compute", action="store_true")
_ = parser.add_argument("--run-kernel", action="store_true")
_ = parser.add_argument("--print-kernel", action="store_true")
_ = parser.add_argument("--print-device-code", action="store_true")
_ = parser.add_argument("--run-sequentially", action="store_true")

_ = parser.add_argument("--m", action="store", type=int, default=128)
_ = parser.add_argument("--n", action="store", type=int, default=128)
_ = parser.add_argument("--k", action="store", type=int, default=128)

_ = parser.add_argument("--bm", action="store", type=int, default=32)
_ = parser.add_argument("--bn", action="store", type=int, default=32)
_ = parser.add_argument("--bk", action="store", type=int, default=16)

args = parser.parse_args()

main(
M=args.m,
N=args.n,
K=args.k,
bm=args.bm,
bn=args.bn,
bk=args.bk,
use_precompute=args.precompute,
use_compute=args.compute,
run_kernel=args.run_kernel,
print_kernel=args.print_kernel,
print_device_code=args.print_device_code,
run_sequentially=args.run_sequentially
)
132 changes: 132 additions & 0 deletions examples/python/compute-examples/finite-difference-2-5D.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import loopy as lp
from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2
from loopy.transform.compute import compute

import namedisl as nisl

import numpy as np
import numpy.linalg as la

import pyopencl as cl


# FIXME: more complicated function, or better yet define a set of functions
# with sympy and get the exact laplacian symbolically
def f(x, y, z):
return x**2 + y**2 + z**2


def laplacian_f(x, y, z):
return 6 * np.ones_like(x)


def main(
use_compute: bool = False,
print_device_code: bool = False,
print_kernel: bool = False
) -> None:
npts = 64
pts = np.linspace(-1, 1, num=npts, endpoint=True)
h = pts[1] - pts[0]

x, y, z = np.meshgrid(*(pts,)*3)

dtype = np.float32
x = x.reshape(*(npts,)*3).astype(np.float32)
y = y.reshape(*(npts,)*3).astype(np.float32)
z = z.reshape(*(npts,)*3).astype(np.float32)

f_ = f(x, y, z)
lap_fd = np.zeros_like(f_)
c = (np.array([-1/12, 4/3, -5/2, 4/3, -1/12]) / h**2).astype(dtype)

m = 5
r = m // 2

bm = bn = m

# FIXME: the usage on the k dimension is incorrect since we are only testing
# tiling (i, j) planes
knl = lp.make_kernel(
"{ [i, j, k, l] : r <= i, j, k < npts - r and -r <= l < r + 1 }",
"""
u_(is, js, ks) := u[is, js, ks]

lap_u[i,j,k] = sum(
[l],
c[l+r] * (u_(i-l,j,k) + u_(i,j-l,k) + u[i,j,k-l])
)
""",
[
lp.GlobalArg("u", dtype=dtype, shape=(npts,npts,npts)),
lp.GlobalArg("lap_u", dtype=dtype, shape=(npts,npts,npts),
is_output=True),
lp.GlobalArg("c", dtype=dtype, shape=(m))
]
)

knl = lp.fix_parameters(knl, npts=npts, r=r)

knl = lp.split_iname(knl, "i", bm, inner_iname="ii", outer_iname="io")
knl = lp.split_iname(knl, "j", bn, inner_iname="ji", outer_iname="jo")

# FIXME: need to split k dimension

if use_compute:
compute_map = nisl.make_map(
f"""
{{
[is, js, ks] -> [io, ii_s, jo, ji_s, k_s] :
0 <= ii_s < {bm} and 0 <= ji_s < {bn} and 0 <= k_s < {npts} and
is = io * {bm} + ii_s and
js = jo * {bn} + ji_s and
ks = k_s
}}
"""
)

knl = compute(
knl,
"u_",
compute_map=compute_map,
storage_indices=["ii_s", "ji_s", "k_s"],
temporal_inames=["io", "jo"],
temporary_name="u_compute",
temporary_address_space=lp.AddressSpace.LOCAL,
temporary_dtype=np.float32
)

if print_device_code:
print(lp.generate_code_v2(knl).device_code())

if print_kernel:
print(knl)

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

ex = knl.executor(queue)
_, lap_fd = ex(queue, u=f(x, y, z), c=c)

lap_true = laplacian_f(x, y, z)
sl = (slice(r, npts - r),)*3

print(la.norm(lap_true[sl] - lap_fd[0][sl]) / la.norm(lap_true[sl]))


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser()

_ = parser.add_argument("--compute", action="store_true")
_ = parser.add_argument("--print-device-code", action="store_true")
_ = parser.add_argument("--print-kernel", action="store_true")

args = parser.parse_args()

main(
use_compute=args.compute,
print_device_code=args.print_device_code,
print_kernel=args.print_kernel
)
40 changes: 28 additions & 12 deletions loopy/symbolic.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
from constantdict import constantdict
from typing_extensions import Self, override

import namedisl as nisl
import islpy as isl

Check failure on line 52 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Import "namedisl" could not be resolved (reportMissingImports)
import pymbolic.primitives as p
import pytools.lex
from pymbolic.mapper import (
Expand Down Expand Up @@ -2044,45 +2045,60 @@

# {{{ (pw)aff to expr conversion

def aff_to_expr(aff: isl.Aff) -> ArithmeticExpression:
def aff_to_expr(aff: isl.Aff | nisl.Aff) -> ArithmeticExpression:
from pymbolic import var

# FIXME: remove this once namedisl is the standard in loopy
denom = aff.get_denominator_val().to_python()

result = (aff.get_constant_val()*denom).to_python()
for dt in [isl.dim_type.in_, isl.dim_type.param]:
for i in range(aff.dim(dt)):
coeff = (aff.get_coefficient_val(dt, i)*denom).to_python()
if isinstance(aff, isl.Aff):
for dt in [isl.dim_type.in_, isl.dim_type.param]:
for i in range(aff.dim(dt)):
coeff = (aff.get_coefficient_val(dt, i)*denom).to_python()
if coeff:
dim_name = not_none(aff.get_dim_name(dt, i))
result += coeff*var(dim_name)

for i in range(aff.dim(isl.dim_type.div)):
coeff = (aff.get_coefficient_val(isl.dim_type.div, i)*denom).to_python()
if coeff:
result += coeff*aff_to_expr(aff.get_div(i))

else:
in_names = set(aff.dim_type_names(isl.dim_type.in_))
param_names = set(aff.dim_type_names(isl.dim_type.param))

for name in in_names | param_names:
coeff = (aff.get_coefficient_val(name) * denom).to_python()
if coeff:
dim_name = not_none(aff.get_dim_name(dt, i))
result += coeff*var(dim_name)
result = coeff * var(name)

for i in range(aff.dim(isl.dim_type.div)):
coeff = (aff.get_coefficient_val(isl.dim_type.div, i)*denom).to_python()
if coeff:
result += coeff*aff_to_expr(aff.get_div(i))
for name in aff.dim_type_names(isl.dim_type.div):
coeff = (aff.get_coefficient_val(name) * denom).to_python()
if coeff:
result += coeff * aff_to_expr(aff.get_div(name))

assert not isinstance(result, complex)

Check warning on line 2081 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "Aff" is unknown (reportUnknownMemberType)

Check warning on line 2081 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of parameter "aff" is partially unknown   Parameter type is "Aff | Unknown" (reportUnknownParameterType)
return flatten(result // denom)


def pw_aff_to_expr(

Check warning on line 2085 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "to_python" is partially unknown   Type of "to_python" is "(() -> int) | Unknown" (reportUnknownMemberType)

Check warning on line 2085 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "get_denominator_val" is partially unknown   Type of "get_denominator_val" is "(() -> Val) | Unknown" (reportUnknownMemberType)
pw_aff: int | isl.PwAff | isl.Aff,
pw_aff: int | isl.PwAff | isl.Aff | nisl.PwAff | nisl.Aff,

Check warning on line 2086 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "get_constant_val" is partially unknown   Type of "get_constant_val" is "(() -> Val) | Unknown" (reportUnknownMemberType)

Check warning on line 2086 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "to_python" is partially unknown   Type of "to_python" is "(() -> int) | Unknown" (reportUnknownMemberType)
int_ok: bool = False
) -> ArithmeticExpression:
if isinstance(pw_aff, int):
if not int_ok:

Check warning on line 2090 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "to_python" is partially unknown   Type of "to_python" is "(() -> int) | Unknown" (reportUnknownMemberType)
warn(f"expected PwAff, got int: {pw_aff}", stacklevel=2)

return pw_aff

pieces = pw_aff.get_pieces()
last_expr = aff_to_expr(pieces[-1][1])

Check warning on line 2096 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "to_python" is partially unknown   Type of "to_python" is "(() -> int) | Unknown" (reportUnknownMemberType)

pairs = [(set_to_cond_expr(constr_set), aff_to_expr(aff))
for constr_set, aff in pieces[:-1]]

from pymbolic.primitives import If

Check warning on line 2101 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Argument type is unknown   Argument corresponds to parameter "iterable" in function "__init__" (reportUnknownArgumentType)

Check warning on line 2101 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "dim_type_names" is unknown (reportUnknownMemberType)

expr = last_expr
for condition, then_expr in reversed(pairs):
Expand Down
Loading
Loading