Files
nvbench/scripts/test_cmp.json
Allison Vacanti 861f66c161 Update comparison script.
- Handle multiple benchmarks.
- Split output to resemble nvbench markdown output:
  - Headings for benchmarks / devices
  - Compact tables with axis values / stats
- Handle `null` stdev (these are infinite, e.g. too few samples)
- Format numbers, times, percentages similar to nvbench.
- Add summary of total/pass/unknown/failure stats
- Add new test.json files (with multiple benchmarks in each)
2021-06-22 16:15:22 -04:00

23046 lines
696 KiB
JSON

{
"devices": [
{
"id": 0,
"name": "NVIDIA GeForce GTX 1650",
"sm_version": 750,
"ptx_version": 750,
"sm_default_clock_rate": 1560000000,
"number_of_sms": 16,
"max_blocks_per_sm": 16,
"max_threads_per_sm": 1024,
"max_threads_per_block": 1024,
"registers_per_sm": 65536,
"registers_per_block": 65536,
"global_memory_size": 4294967296,
"global_memory_bus_peak_clock_rate": 4001000000,
"global_memory_bus_width": 128,
"global_memory_bus_bandwidth": 128032000000,
"l2_cache_size": 1048576,
"shared_memory_per_sm": 65536,
"shared_memory_per_block": 49152,
"ecc_state": false
}
],
"benchmarks": [
{
"index": 0,
"name": "cub::DeviceRadixSort::SortKeys - Overview",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0
],
"axes": {
"Key": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "bool",
"description": "",
"is_active": true
},
{
"input_string": "U8",
"description": "uint8_t",
"is_active": true
},
{
"input_string": "U16",
"description": "uint16_t",
"is_active": true
},
{
"input_string": "U32",
"description": "uint32_t",
"is_active": true
},
{
"input_string": "U64",
"description": "uint64_t",
"is_active": true
},
{
"input_string": "I8",
"description": "int8_t",
"is_active": true
},
{
"input_string": "I16",
"description": "int16_t",
"is_active": true
},
{
"input_string": "I32",
"description": "int32_t",
"is_active": true
},
{
"input_string": "I64",
"description": "int64_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
},
"Input": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "Rand",
"description": "Random values uniformly distributed across `T`'s value range",
"is_active": true
}
]
},
"Pattern": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "Ascend",
"description": "",
"is_active": true
}
]
},
"Elements": {
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "16",
"description": "2^16 = 65536",
"value": 65536
},
{
"input_string": "18",
"description": "2^18 = 262144",
"value": 262144
},
{
"input_string": "20",
"description": "2^20 = 1048576",
"value": 1048576
},
{
"input_string": "22",
"description": "2^22 = 4194304",
"value": 4194304
},
{
"input_string": "24",
"description": "2^24 = 16777216",
"value": 16777216
},
{
"input_string": "26",
"description": "2^26 = 67108864",
"value": 67108864
},
{
"input_string": "28",
"description": "2^28 = 268435456",
"value": 268435456
},
{
"input_string": "30",
"description": "2^30 = 1073741824",
"value": 1073741824
}
]
}
},
"states": {
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "bool"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 65536
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 3042
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 3.927120315581862e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.4351576079801729
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 3.2775679267673803e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.4041140207967676
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1999531404.5142384
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 3999062809.0284767
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.031234869478165433
}
}
},
"is_skipped": false
},
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "bool"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 262144
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1691
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 5.06085156712005e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.3900269997798082
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 4.116893645171086e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.15850062777398677
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 6367519362.747736
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 12735038725.495472
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.09946762313714909
}
}
},
"is_skipped": false
},
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "bool"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1048576
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 537
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00010505661080074492
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.13838288384658645
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 9.64652218021716e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.08737598421104262
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 10869990037.96822
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 21739980075.93644
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.16980114405723912
}
}
},
"is_skipped": false
},
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "bool"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 141
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0003081985815602837
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.025128010564140792
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0003021013356269674
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.02018452044120483
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 13883765165.404953
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 27767530330.809906
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.2168796108067507
}
}
},
"is_skipped": false
},
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "bool"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 34
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.001104020588235294
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.00868011645589697
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0010932272953145647
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.003434233699996367
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 15346503029.978348
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 30693006059.956696
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.23972917754902442
}
}
},
"is_skipped": false
},
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "bool"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 8
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0042372500000000006
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0030855867536742037
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0042257159948349
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0019932191078502345
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 15881063488.892126
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 31762126977.784252
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.2480795971146608
}
}
},
"is_skipped": false
},
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "bool"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.019142950000000002
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.019130672454833984
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 14031679055.388933
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 28063358110.777866
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.2191901876935287
}
}
},
"is_skipped": false
},
"Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "bool"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0667937
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.06678323364257813
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 16078014876.407965
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 32156029752.81593
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.2511561933955256
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 65536
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 3062
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 5.317815153494455e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.3713839348328813
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 4.7155929496967694e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.40429857529493773
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1389772202.54378
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 2779544405.08756
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.021709763223940578
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 262144
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1569
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 7.891446781389421e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.11657350278726154
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 7.26659224292061e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.11037642266139251
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 3607523186.0627747
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 7215046372.125549
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.05635346141687664
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1048576
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 505
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0001637732673267328
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04440230894077945
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00015781791602620992
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04411047078155534
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 6644213954.934341
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 13288427909.868683
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1037898955719561
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 129
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0005240697674418604
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01553926208912671
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0005178778284741924
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.015023407302414188
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 8099022142.650033
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 16198044285.300066
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.12651559208088656
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 31
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0019522129032258073
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.23301756357696535
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.001942181168063994
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.23453440661836827
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 8638337285.8691
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 17276674571.7382
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.13494028502044958
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 8
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0072903875
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007239378588174434
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.007273227989673614
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007137742915442734
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 9226833545.611362
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 18453667091.222725
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.14413324083996754
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.029422500000000004
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.029412703514099123
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 9126514190.418577
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 18253028380.837154
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.14256614268961787
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.12713770000000002
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.12712566375732423
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 8446302597.481127
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 16892605194.962254
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.131940492962402
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 131072
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2506
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 7.933663208300092e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.27652333688390907
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 7.329776243909458e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.2908651937565574
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 894106420.4307182
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 3576425681.7228727
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.027933842177915464
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 524288
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1269
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00014559330181245089
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.19873696733232882
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00013759964715081717
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.09333440846518179
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1905121164.3927765
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 7620484657.571106
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.05952015634818722
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 2097152
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 406
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00038108940886699564
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.03821569814913922
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00037408323064813464
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.029659979896029746
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2803055347.290609
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 11212221389.162436
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.08757358620627996
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 8388608
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 110
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0011471336363636359
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01637197025341408
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0011375141815705729
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.014295623169967478
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 3687254249.6207814
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 14749016998.483126
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11519789582669275
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 33554432
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 28
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.004043317857142858
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01100346103239151
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.004035516560077667
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01078543812390853
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 4157389952.4964676
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 16629559809.98587
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.12988596452438353
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 134217728
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 7
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.015595228571428572
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.005255757704044775
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.015586642401559011
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.00526989300271275
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 4305536899.54981
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 17222147598.19924
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.13451439951105382
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 536870912
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0652593
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0652172622680664
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 4116018469.107669
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 16464073876.430676
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.12859342880241406
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 262144
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2245
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 8.775260579064617e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.2588213270041119
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 8.086750657362002e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.2692444298865416
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 810412027.9798346
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 6483296223.838676
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.05063809222568324
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1048576
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1055
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00021251706161137438
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.06362320460336848
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00020548479910145425
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.06357499446836187
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1275734269.1347759
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 10205874153.078207
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07971346345505972
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 328
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0006486628048780487
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.062073612578813026
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0006387051705543588
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.019862156160162552
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1641721483.3095798
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 13133771866.476639
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.10258194722004373
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 87
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0022856022988505752
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01813131000490793
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.002277155692549959
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.018170531150182676
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1841904799.800148
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 14735238398.401184
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11509027741815471
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 22
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.008837786363636363
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007500416221662679
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.008825995575297962
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007696619972960275
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1900886518.3385963
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 15207092146.70877
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11877571346779532
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 6
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.035352516666666674
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.006577426327632489
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.03533673604329427
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.006429928650474585
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1899124580.090781
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 15192996640.726248
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11866561985071113
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.1430682
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.14305706024169923
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1876422285.9498873
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 15011378287.599098
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11724708110159256
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 524288
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1413
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00018726730360934168
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.19158497176050596
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00017967336391272188
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.1929620201640373
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 364750782.04599524
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 5836012512.735924
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.045582452142713725
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 2097152
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 593
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0005387549747048903
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04111938777242051
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0005312656186282335
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04063673179425505
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 493433022.59399897
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 7894928361.5039835
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.0616637118962758
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 8388608
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 181
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0018093104972375699
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04001254538809503
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0017961900550357542
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01423183226589721
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 583777867.5258992
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9340445880.414387
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.0729539949420019
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 33554432
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 47
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00701123404255319
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.022882268388176443
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.006997501982019303
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.023591403635686216
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 599400187.492284
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9590402999.876545
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07490629686231992
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 134217728
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 12
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.027476083333333335
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.009319641724100018
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.02746358140309652
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.009143918373427004
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 610889590.6091974
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9774233449.747158
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07634211329782521
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 536870912
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 3
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.11105753333333335
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.11104602813720703
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 604333762.5464745
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9669340200.743591
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07552283960840721
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 65536
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 3087
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 5.5682150955620366e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.15974697309068234
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 4.976085047405057e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.1566862558828566
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1317019290.7811313
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 2634038581.5622625
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.020573283097680757
}
}
},
"is_skipped": false
},
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 262144
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1508
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 8.478799734748009e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.18692260194964838
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 7.815668975504499e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.07663177112866752
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 3354082687.247878
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 6708165374.495756
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.052394443377403746
}
}
},
"is_skipped": false
},
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1048576
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 493
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.000172661663286004
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.03967760002514821
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0001667072121922918
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.03756877297429747
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 6289925829.9065
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 12579851659.813
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.09825552721048644
}
}
},
"is_skipped": false
},
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 126
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.000547765079365079
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.02267930122532969
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0005414133327347893
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.020125269100866486
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 7746953660.734054
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 15493907321.468107
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.12101589697472591
}
}
},
"is_skipped": false
},
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 31
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0019378612903225812
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.011107088633221586
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0019301894287909237
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01047020541368632
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 8692004913.999191
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 17384009827.998383
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.13577863212320657
}
}
},
"is_skipped": false
},
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 8
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0075735125
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.008208621847337904
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.007558403968811036
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.006689815940438041
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 8878708293.036163
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 17757416586.072327
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.13869514329286683
}
}
},
"is_skipped": false
},
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.03057025
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.03056054401397705
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 8783726358.968918
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 17567452717.937836
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.13721142150351345
}
}
},
"is_skipped": false
},
"Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.1411317
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.14112380981445313
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 7608509332.420483
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 15217018664.840965
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11885324500781809
}
}
},
"is_skipped": false
},
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 131072
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2444
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 8.023633387888718e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.15957149507307994
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 7.409614451997878e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.11997847029431298
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 884472470.5254984
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 3537889882.1019936
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.027632856489799375
}
}
},
"is_skipped": false
},
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 524288
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1258
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00014721875993640703
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.05030707755378517
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00014142066840858286
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04965983055115567
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1853647016.026198
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 7414588064.104792
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.05791199125300543
}
}
},
"is_skipped": false
},
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 2097152
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 395
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00038132582278481046
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.022492628088386854
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.000374839979561069
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.020997160180716862
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2797396374.922077
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 11189585499.688309
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.08739678751943505
}
}
},
"is_skipped": false
},
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 8388608
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 107
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0011724710280373834
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.019342776294093793
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0011632810418850906
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.016095597179303357
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 3605580980.846342
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 14422323923.385368
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11264624409042559
}
}
},
"is_skipped": false
},
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 33554432
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 27
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00414218888888889
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.010143570260672414
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.004132891301755552
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01004138981574669
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 4059437999.9477477
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 16237751999.79099
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.12682573106560072
}
}
},
"is_skipped": false
},
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 134217728
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 7
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.015818842857142856
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.004297912986520931
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.015806600979396273
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.004417201153941865
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 4245622704.5571437
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 16982490818.228575
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1326425488801907
}
}
},
"is_skipped": false
},
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 536870912
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.06584135
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.06581772994995116
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 4078467248.9331145
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 16313868995.732458
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.12742024646754294
}
}
},
"is_skipped": false
},
"Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 262144
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2237
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 8.876477425122915e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.35106187182872767
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 8.160484933232342e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.2520628318430057
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 803089528.823398
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 6424716230.587184
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.05018055041385891
}
}
},
"is_skipped": false
},
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1048576
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1033
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0002160535333978701
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.1188318605057773
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00020900739510142425
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.12132837233117388
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1254233133.1041677
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 10033865064.833342
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07836997832442938
}
}
},
"is_skipped": false
},
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 324
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0006508543209876547
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.017018531103233366
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0006438862221476476
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01711700991339697
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1628511317.578021
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 13028090540.624168
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.10175651821907154
}
}
},
"is_skipped": false
},
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 84
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.002322446428571428
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0213207920280134
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.002313739804994491
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.02154045421700801
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1812781191.2757347
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 14502249530.205877
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1132705068280264
}
}
},
"is_skipped": false
},
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 21
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.008975690476190476
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.008053073918408916
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.008964469319298155
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007960830786781505
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1871523611.987053
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 14972188895.896423
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11694099050156542
}
}
},
"is_skipped": false
},
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 5
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.035879860000000006
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007059995549678311
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.03586985015869141
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0070565543246424054
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1870898922.1617713
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 14967191377.29417
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11690195714582426
}
}
},
"is_skipped": false
},
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.1458295
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.14581488037109375
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1840933211.458537
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 14727465691.668297
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.11502956832407756
}
}
},
"is_skipped": false
},
"Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 524288
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1442
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00018916165048543735
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.08474054695372502
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00018243015832403314
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.08522171353418057
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 359238848.4561566
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 5747821575.298506
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.04489363264885736
}
}
},
"is_skipped": false
},
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 2097152
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 588
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0005411481292517009
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.02452569809177557
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0005339167867185301
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.023483747081537745
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 490982876.9594332
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 7855726031.350931
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.061357520239869186
}
}
},
"is_skipped": false
},
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 8388608
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 182
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.001794480219780219
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01043393384097355
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.001785578369439303
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.009622529095864946
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 587247257.217429
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9395956115.478865
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.0733875602621131
}
}
},
"is_skipped": false
},
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 33554432
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 47
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.007055212765957448
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.022872858397093812
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.007045798778533936
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.02290392030641084
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 595291482.4616572
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9524663719.386515
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07439283709843254
}
}
},
"is_skipped": false
},
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 134217728
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 12
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.027424558333333335
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0056934303158599825
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.027411389350891106
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.005639627987530259
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 612052741.4804167
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9792843863.686666
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07648747081734775
}
}
},
"is_skipped": false
},
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 536870912
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 3
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.11087960000000001
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.11086710357666014
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 605309075.7764491
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9684945212.423185
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07564472329123333
}
}
},
"is_skipped": false
},
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "I64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 262144
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2324
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 8.308123924268518e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.20123216459376678
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 7.645772117159609e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.2163426509558451
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 857153456.7832047
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 6857227654.265637
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.05355870137360689
}
}
},
"is_skipped": false
},
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1048576
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1081
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00018608593894542115
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.061054319816048364
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00017904156912376654
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.058580426390462105
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1464151600.563705
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 11713212804.50964
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.09148660338438547
}
}
},
"is_skipped": false
},
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 361
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0004975271468144047
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.014379246069577772
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0004902403537091129
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.012272733824773483
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2138901851.0340319
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 17111214808.272255
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1336479537012017
}
}
},
"is_skipped": false
},
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 96
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.001751761458333334
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.005879019262093316
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.001744055998822054
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.005818847781913598
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2404913605.315918
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 19239308842.527344
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.15026953294900763
}
}
},
"is_skipped": false
},
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 24
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.006750754166666667
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.002444731344335298
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0067418733040491745
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0023841178656418723
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2488509534.8682375
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 19908076278.9459
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.15549297268609333
}
}
},
"is_skipped": false
},
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 6
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.026754200000000006
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0006428245182255163
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.026743643124898273
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0006056040012260031
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2509338899.2138395
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 20074711193.710716
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1567944825802199
}
}
},
"is_skipped": false
},
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.10674365000000001
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.10673254394531251
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2515029119.3053603
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 20120232954.442883
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1571500324484729
}
}
},
"is_skipped": false
},
"Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^16": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 65536
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 524288
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1446
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00017735048409405266
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.11011368701446163
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00017031459973024972
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.1112846952116118
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 384793788.1062353
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 6156700609.699765
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.048087201712851205
}
}
},
"is_skipped": false
},
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^18": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 262144
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 2097152
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 614
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0005115635179153098
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.014363302118375774
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.000504564481760081
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.013743392821332084
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 519545091.8097892
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 8312721468.956627
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.06492690475003614
}
}
},
"is_skipped": false
},
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 8388608
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 187
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0017560828877005348
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007271133357282276
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0017482294819571758
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007054498367435898
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 599793111.1572946
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9596689778.516714
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07495540004465066
}
}
},
"is_skipped": false
},
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 33554432
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 50
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0067117
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0020046252309587377
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.006701972465515136
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.001795830890697078
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 625831279.0125157
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 10013300464.20025
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07820935753718017
}
}
},
"is_skipped": false
},
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 134217728
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 13
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.02650104615384615
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0007265940345306042
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.026490544979388894
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.00072958672729313
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 633328457.8725579
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 10133255325.960926
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07914627066640313
}
}
},
"is_skipped": false
},
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 536870912
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 3
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.10620103333333335
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.10618826548258464
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 631980037.4835782
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 10111680599.737251
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07897776024538593
}
}
},
"is_skipped": false
},
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "F64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
}
}
},
{
"index": 1,
"name": "cub::DeviceRadixSort::SortKeys - Constant Values",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0
],
"axes": {
"Key": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "U8",
"description": "uint8_t",
"is_active": true
},
{
"input_string": "U16",
"description": "uint16_t",
"is_active": true
},
{
"input_string": "U32",
"description": "uint32_t",
"is_active": true
},
{
"input_string": "U64",
"description": "uint64_t",
"is_active": true
}
]
},
"Input": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "Const",
"description": "All values = 42",
"is_active": true
}
]
},
"Pattern": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "Ascend",
"description": "",
"is_active": true
}
]
},
"Elements": {
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "20",
"description": "2^20 = 1048576",
"value": 1048576
},
{
"input_string": "22",
"description": "2^22 = 4194304",
"value": 4194304
},
{
"input_string": "24",
"description": "2^24 = 16777216",
"value": 16777216
},
{
"input_string": "26",
"description": "2^26 = 67108864",
"value": 67108864
},
{
"input_string": "28",
"description": "2^28 = 268435456",
"value": 268435456
},
{
"input_string": "30",
"description": "2^30 = 1073741824",
"value": 1073741824
}
]
}
},
"states": {
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1048576
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2566
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 9.6390140296181e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.045598791758050095
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 9.036228858044716e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.033692751648895305
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 11604132835.419285
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 23208265670.83857
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1812692582388666
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1160
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0003043432758620684
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.014476575112824767
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00029814910513573545
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.011313650678305812
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 14067806771.013115
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 28135613542.02623
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.21975454216153953
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 363
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.001116191460055097
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.011463805762636945
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0011063706461063093
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.008802868537347153
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 15164191185.878504
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 30328382371.757008
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.23688126696261097
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 100
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.004274347
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0032816652112393967
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.004266441283226011
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0032308789304643024
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 15729470897.40716
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 31458941794.81432
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.24571155488326604
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 26
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.017081826923076922
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.010556997792740205
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.017073184013366702
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.010548301451256467
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 15722635906.099308
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 31445271812.198616
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.24560478483659254
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 7
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.06782757142857143
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.002248092939308053
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.06781701987130302
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.002251004801379183
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 15832925510.994875
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 31665851021.98975
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.24732762920199441
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 2097152
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1374
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00025053580786026195
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.06364830482823801
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0002446616114899394
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.06504028362673961
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 4285821521.4654465
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 17143286085.861786
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.13389844793381175
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 8388608
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 484
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.000845314876033058
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0042877181599594
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0008389603308409695
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.004147125806742644
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 4999406820.338754
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 19997627281.355015
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1561924150318281
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 33554432
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 138
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.003167936956521741
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.009915660724473297
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0031528834553732395
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.001304193155702654
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 5321229356.387328
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 21284917425.549313
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1662468556731857
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 134217728
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 37
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.012415567567567568
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0020929194181039177
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.012407164470569507
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0020887980997006564
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 5408880019.216801
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 21635520076.867203
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1689852542869533
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 536870912
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 10
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.04940072
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.000576076970718073
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.04938988418579101
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0005818752831350183
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 5435029063.648347
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 21740116254.593388
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.16980220768708906
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 804
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00047247549751243796
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.012916467373333794
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0004640141693291389
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.005379896492658131
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2259793060.8800316
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 18078344487.040253
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.14120176586353608
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 251
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.001718760956175299
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.008648709803676304
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0017113251937813012
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.008654677513055743
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2450909982.0662203
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 19607279856.529762
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.15314358798214323
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 68
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.006638683823529413
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.002767845306863416
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0066234131490483005
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0015437824831612992
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2533016682.2540236
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 20264133458.03219
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.15827397414733962
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 18
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.02638116111111112
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0035695573142047353
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.02635982047186958
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0034495125905173608
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2545877126.576662
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 20367017012.613297
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.15907755102328555
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 5
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.10501874
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0005592643361183458
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.10500200805664064
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.00055006156023586
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 2556479261.3794527
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 20451834091.03562
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.15974001883150793
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^20": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 8388608
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 254
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0017053511811023635
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.005201838970683627
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0016952332624300263
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0035478470341980604
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 618543785.8250388
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 9896700573.20062
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07729864856598835
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^22": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 33554432
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 71
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.006611254929577467
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.003663027154345941
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0066021115477655976
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0036695333152667172
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 635297354.4379919
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 10164757671.00787
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.07939232122444287
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^24": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 134217728
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 19
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.026109178947368427
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0007116177577395647
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.026097561384502206
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0007337748011409093
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 642865275.9089972
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 10285844414.543955
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.08033807496988217
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^26": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 536870912
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 5
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.10435436000000002
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0008827267294601751
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.10434051666259767
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0009079744888650902
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 643171666.6403677
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 10290746666.245884
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.08037636423898623
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^28": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^30": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Const"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
}
}
},
{
"index": 2,
"name": "cub::DeviceRadixSort::SortKeys - Half Word",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0
],
"axes": {
"Key": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "U8",
"description": "uint8_t",
"is_active": true
},
{
"input_string": "U16",
"description": "uint16_t",
"is_active": true
},
{
"input_string": "U32",
"description": "uint32_t",
"is_active": true
},
{
"input_string": "U64",
"description": "uint64_t",
"is_active": true
}
]
},
"Input": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "Rand",
"description": "Random values uniformly distributed across `T`'s value range",
"is_active": true
}
]
},
"Pattern": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "Ascend",
"description": "",
"is_active": true
}
]
},
"Elements": {
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "20",
"description": "2^20 = 1048576",
"value": 1048576
},
{
"input_string": "22",
"description": "2^22 = 4194304",
"value": 4194304
},
{
"input_string": "24",
"description": "2^24 = 16777216",
"value": 16777216
},
{
"input_string": "26",
"description": "2^26 = 67108864",
"value": 67108864
},
{
"input_string": "28",
"description": "2^28 = 268435456",
"value": 268435456
},
{
"input_string": "30",
"description": "2^30 = 1073741824",
"value": 1073741824
}
]
},
"Bits": {
"type": "string",
"flags": "",
"values": [
{
"input_string": "Half",
"description": "",
"value": "Half"
}
]
}
},
"states": {
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1048576
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 538
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 8.960966542750939e-05
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.08397008166379398
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 8.340330078477751e-05
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.045910043356146846
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 12572356131.394053
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 25144712262.788105
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.19639396606151668
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 131
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00028253587786259543
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04123287912799956
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0002716179535589143
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.037808745584459125
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 15441924751.451488
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 30883849502.902977
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.24121976929910474
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 33
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0009550272727272727
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007063558495008569
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0009488116340203719
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007360718544290043
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 17682346419.92151
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 35364692839.84302
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.27621760840917126
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 8
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.003773775
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.014705369350832631
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0037657760083675386
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.014874000832241253
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 17820726418.90659
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 35641452837.81318
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.2783792554815451
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.014729750000000002
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.01471895980834961
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 18237393096.74077
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 36474786193.48154
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.2848880451252932
}
}
},
"is_skipped": false
},
"Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U8"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 1
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.06453160000000001
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.06452188873291016
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 16641512594.97035
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 33283025189.9407
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.2599586446352529
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 2097152
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 450
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00025646377777777785
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.021618265944340534
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.00025038620548115814
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.022009562632867193
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 4187834541.3837366
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 16751338165.534946
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.13083712013820722
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 8388608
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 116
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0007816620689655175
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.020853227399617164
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0007727365504051081
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.016658278118279097
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 5427857654.463388
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 21711430617.853554
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.16957815716269023
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 33554432
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 29
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.002729172413793104
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007205020379178172
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.002722046876775808
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.007340614282676166
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 6163455943.077721
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 24653823772.310883
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.19255985825661462
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 134217728
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 7
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0105532
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.005970051554930724
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.010544246673583986
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.005939659771636435
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 6364500573.390865
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 25458002293.56346
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1988409326852932
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 536870912
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0433476
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.04333521652221679
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 6194395171.935518
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 24777580687.742073
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1935264675061084
}
}
},
"is_skipped": false
},
"Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U16"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 4194304
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 403
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.00035955682382134016
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04131883733262655
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.000348795553294956
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.031411058442745776
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 3006276857.874047
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 24050214862.992374
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.18784534228155753
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 16777216
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 103
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0012605281553398058
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0430687589837951
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0012512904081529784
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.03898592591253357
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 3351982859.191884
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 26815862873.535072
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.20944656705772832
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 67108864
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 26
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.004762250000000001
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.004982744499450167
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.004748374150349544
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0038811646473003495
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 3533254850.771389
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 28266038806.171112
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.22077323486449568
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 268435456
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 7
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.01901851428571429
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.010872125017202998
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.019009632383074078
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.01087158253950949
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 3530255748.646293
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 28242045989.170345
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.22058583783093558
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 1073741824
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 2
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.07661000000000001
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.07659774398803712
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": null
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 3504482534.6543326
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 28035860277.23466
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.21897541456225522
}
}
},
"is_skipped": false
},
"Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U32"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1048576
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 8388608
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 270
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0009441937037037035
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.08572346375827458
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0009308705164326564
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.019541093452539494
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1126446677.0506625
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 18023146832.8106
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1407706419708401
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 4194304
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 33554432
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 68
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.0036803838235294115
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04345566028172002
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.0036717261111035055
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.04355542870677633
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1142324855.6901317
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 18277197691.042107
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.14275491823170852
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 16777216
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 134217728
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 18
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.01408933888888889
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0017506825580184378
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.014078581280178495
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0016365268643609497
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1191683712.024376
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 19066939392.390015
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.1489232331947483
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 67108864
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": {
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "Size"
},
"value": {
"type": "int64",
"value": 536870912
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": 5
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": 0.05700572
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.0126927638236131
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": 0.05699455947875977
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": 0.012703536689392532
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": 1177460877.2089825
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": 18839374035.34372
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": 0.147145823195324
}
}
},
"is_skipped": false
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 268435456
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
},
"Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Key": {
"type": "string",
"value": "U64"
},
"Input": {
"type": "string",
"value": "Rand"
},
"Pattern": {
"type": "string",
"value": "Ascend"
},
"Elements": {
"type": "int64",
"value": 1073741824
},
"Bits": {
"type": "string",
"value": "Half"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory"
}
}
}
]
}