mirror of
https://github.com/NVIDIA/nvbench.git
synced 2026-04-20 06:48:53 +00:00
Change test and examples from using camelCase to using snake_case as implementation changed
This commit is contained in:
@@ -14,18 +14,18 @@ def kernel(a, b, c):
|
||||
c[tid] = a[tid] + b[tid]
|
||||
|
||||
|
||||
def getNumbaStream(launch):
|
||||
return cuda.external_stream(launch.getStream().addressof())
|
||||
def get_numba_stream(launch):
|
||||
return cuda.external_stream(launch.get_stream().addressof())
|
||||
|
||||
|
||||
def add_two(state):
|
||||
# state.skip("Skipping this benchmark for no reason")
|
||||
N = state.getInt64("elements")
|
||||
N = state.get_int64("elements")
|
||||
a = cuda.to_device(np.random.random(N))
|
||||
c = cuda.device_array_like(a)
|
||||
|
||||
state.addGlobalMemoryReads(a.nbytes)
|
||||
state.addGlobalMemoryWrites(c.nbytes)
|
||||
state.add_global_memory_reads(a.nbytes)
|
||||
state.add_global_memory_writes(c.nbytes)
|
||||
|
||||
nthreads = 256
|
||||
nblocks = (len(a) + nthreads - 1) // nthreads
|
||||
@@ -35,22 +35,22 @@ def add_two(state):
|
||||
cuda.synchronize()
|
||||
|
||||
def kernel_launcher(launch):
|
||||
stream = getNumbaStream(launch)
|
||||
stream = get_numba_stream(launch)
|
||||
kernel[nblocks, nthreads, stream](a, a, c)
|
||||
|
||||
state.exec(kernel_launcher, batched=True, sync=True)
|
||||
|
||||
|
||||
def add_float(state):
|
||||
N = state.getInt64("elements")
|
||||
v = state.getFloat64("v")
|
||||
name = state.getString("name")
|
||||
N = state.get_int64("elements")
|
||||
v = state.get_gloat64("v")
|
||||
name = state.get_string("name")
|
||||
a = cuda.to_device(np.random.random(N).astype(np.float32))
|
||||
b = cuda.to_device(np.random.random(N).astype(np.float32))
|
||||
c = cuda.device_array_like(a)
|
||||
|
||||
state.addGlobalMemoryReads(a.nbytes + b.nbytes)
|
||||
state.addGlobalMemoryWrites(c.nbytes)
|
||||
state.add_global_memory_reads(a.nbytes + b.nbytes)
|
||||
state.add_global_memory_writes(c.nbytes)
|
||||
|
||||
nthreads = 64
|
||||
nblocks = (len(a) + nthreads - 1) // nthreads
|
||||
@@ -58,26 +58,26 @@ def add_float(state):
|
||||
def kernel_launcher(launch):
|
||||
_ = v
|
||||
_ = name
|
||||
stream = getNumbaStream(launch)
|
||||
stream = get_numba_stream(launch)
|
||||
kernel[nblocks, nthreads, stream](a, b, c)
|
||||
|
||||
state.exec(kernel_launcher, batched=True, sync=True)
|
||||
|
||||
|
||||
def add_three(state):
|
||||
N = state.getInt64("elements")
|
||||
N = state.get_int64("elements")
|
||||
a = cuda.to_device(np.random.random(N).astype(np.float32))
|
||||
b = cuda.to_device(np.random.random(N).astype(np.float32))
|
||||
c = cuda.device_array_like(a)
|
||||
|
||||
state.addGlobalMemoryReads(a.nbytes + b.nbytes)
|
||||
state.addGlobalMemoryWrites(c.nbytes)
|
||||
state.add_global_memory_reads(a.nbytes + b.nbytes)
|
||||
state.add_global_memory_writes(c.nbytes)
|
||||
|
||||
nthreads = 256
|
||||
nblocks = (len(a) + nthreads - 1) // nthreads
|
||||
|
||||
def kernel_launcher(launch):
|
||||
stream = getNumbaStream(launch)
|
||||
stream = get_numba_stream(launch)
|
||||
kernel[nblocks, nthreads, stream](a, b, c)
|
||||
|
||||
state.exec(kernel_launcher, batched=True, sync=True)
|
||||
@@ -86,18 +86,18 @@ def add_three(state):
|
||||
|
||||
def register_benchmarks():
|
||||
(
|
||||
nvbench.register(add_two).addInt64Axis(
|
||||
nvbench.register(add_two).add_int64_axis(
|
||||
"elements", [2**pow2 for pow2 in range(20, 23)]
|
||||
)
|
||||
)
|
||||
(
|
||||
nvbench.register(add_float)
|
||||
.addFloat64Axis("v", [0.1, 0.3])
|
||||
.addStringAxis("name", ["Anne", "Lynda"])
|
||||
.addInt64Axis("elements", [2**pow2 for pow2 in range(20, 23)])
|
||||
.add_float64_axis("v", [0.1, 0.3])
|
||||
.add_string_axis("name", ["Anne", "Lynda"])
|
||||
.add_int64_axis("elements", [2**pow2 for pow2 in range(20, 23)])
|
||||
)
|
||||
(
|
||||
nvbench.register(add_three).addInt64Axis(
|
||||
nvbench.register(add_three).add_int64_axis(
|
||||
"elements", [2**pow2 for pow2 in range(20, 22)]
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user