{ "devices": [ { "id": 0, "name": "NVIDIA GeForce GTX 1650", "sm_version": 750, "ptx_version": 750, "sm_default_clock_rate": 1560000000, "number_of_sms": 16, "max_blocks_per_sm": 16, "max_threads_per_sm": 1024, "max_threads_per_block": 1024, "registers_per_sm": 65536, "registers_per_block": 65536, "global_memory_size": 4294967296, "global_memory_bus_peak_clock_rate": 4001000000, "global_memory_bus_width": 128, "global_memory_bus_bandwidth": 128032000000, "l2_cache_size": 1048576, "shared_memory_per_sm": 65536, "shared_memory_per_block": 49152, "ecc_state": false } ], "benchmarks": [ { "index": 0, "name": "cub::DeviceRadixSort::SortKeys - Overview", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ 0 ], "axes": { "Key": { "type": "type", "flags": "", "values": [ { "input_string": "bool", "description": "", "is_active": true }, { "input_string": "U8", "description": "uint8_t", "is_active": true }, { "input_string": "U16", "description": "uint16_t", "is_active": true }, { "input_string": "U32", "description": "uint32_t", "is_active": true }, { "input_string": "U64", "description": "uint64_t", "is_active": true }, { "input_string": "I8", "description": "int8_t", "is_active": true }, { "input_string": "I16", "description": "int16_t", "is_active": true }, { "input_string": "I32", "description": "int32_t", "is_active": true }, { "input_string": "I64", "description": "int64_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] }, "Input": { "type": "type", "flags": "", "values": [ { "input_string": "Rand", "description": "Random values uniformly distributed across `T`'s value range", "is_active": true } ] }, "Pattern": { "type": "type", "flags": "", "values": [ { "input_string": "Ascend", "description": "", "is_active": true } ] }, "Elements": { "type": "int64", "flags": "pow2", "values": [ { "input_string": "16", "description": "2^16 = 65536", "value": 65536 }, { "input_string": "18", "description": "2^18 = 262144", "value": 262144 }, { "input_string": "20", "description": "2^20 = 1048576", "value": 1048576 }, { "input_string": "22", "description": "2^22 = 4194304", "value": 4194304 }, { "input_string": "24", "description": "2^24 = 16777216", "value": 16777216 }, { "input_string": "26", "description": "2^26 = 67108864", "value": 67108864 }, { "input_string": "28", "description": "2^28 = 268435456", "value": 268435456 }, { "input_string": "30", "description": "2^30 = 1073741824", "value": 1073741824 } ] } }, "states": { "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "bool" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 65536 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 3042 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 3.927120315581862e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.4351576079801729 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 3.2775679267673803e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.4041140207967676 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1999531404.5142384 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 3999062809.0284767 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.031234869478165433 } } }, "is_skipped": false }, "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "bool" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 262144 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1691 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 5.06085156712005e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.3900269997798082 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 4.116893645171086e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.15850062777398677 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 6367519362.747736 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 12735038725.495472 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.09946762313714909 } } }, "is_skipped": false }, "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "bool" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1048576 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 537 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00010505661080074492 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.13838288384658645 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 9.64652218021716e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.08737598421104262 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 10869990037.96822 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 21739980075.93644 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.16980114405723912 } } }, "is_skipped": false }, "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "bool" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 141 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0003081985815602837 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.025128010564140792 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0003021013356269674 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.02018452044120483 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 13883765165.404953 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 27767530330.809906 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.2168796108067507 } } }, "is_skipped": false }, "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "bool" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 34 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.001104020588235294 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.00868011645589697 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0010932272953145647 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.003434233699996367 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 15346503029.978348 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 30693006059.956696 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.23972917754902442 } } }, "is_skipped": false }, "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "bool" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 8 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0042372500000000006 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0030855867536742037 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0042257159948349 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0019932191078502345 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 15881063488.892126 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 31762126977.784252 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.2480795971146608 } } }, "is_skipped": false }, "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "bool" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.019142950000000002 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.019130672454833984 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 14031679055.388933 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 28063358110.777866 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.2191901876935287 } } }, "is_skipped": false }, "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "bool" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0667937 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.06678323364257813 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 16078014876.407965 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 32156029752.81593 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.2511561933955256 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 65536 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 3062 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 5.317815153494455e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.3713839348328813 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 4.7155929496967694e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.40429857529493773 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1389772202.54378 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 2779544405.08756 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.021709763223940578 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 262144 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1569 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 7.891446781389421e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.11657350278726154 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 7.26659224292061e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.11037642266139251 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 3607523186.0627747 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 7215046372.125549 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.05635346141687664 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1048576 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 505 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0001637732673267328 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.04440230894077945 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00015781791602620992 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.04411047078155534 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 6644213954.934341 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 13288427909.868683 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1037898955719561 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 129 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0005240697674418604 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.01553926208912671 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0005178778284741924 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.015023407302414188 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 8099022142.650033 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 16198044285.300066 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.12651559208088656 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 31 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0019522129032258073 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.23301756357696535 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.001942181168063994 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.23453440661836827 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 8638337285.8691 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 17276674571.7382 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.13494028502044958 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 8 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0072903875 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.007239378588174434 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.007273227989673614 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.007137742915442734 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 9226833545.611362 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 18453667091.222725 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.14413324083996754 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.029422500000000004 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.029412703514099123 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 9126514190.418577 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 18253028380.837154 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.14256614268961787 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.12713770000000002 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.12712566375732423 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 8446302597.481127 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 16892605194.962254 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.131940492962402 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 131072 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2506 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 7.933663208300092e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.27652333688390907 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 7.329776243909458e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.2908651937565574 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 894106420.4307182 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 3576425681.7228727 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.027933842177915464 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 524288 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1269 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00014559330181245089 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.19873696733232882 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00013759964715081717 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.09333440846518179 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1905121164.3927765 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 7620484657.571106 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.05952015634818722 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 2097152 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 406 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00038108940886699564 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.03821569814913922 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00037408323064813464 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.029659979896029746 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2803055347.290609 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 11212221389.162436 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.08757358620627996 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 8388608 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 110 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0011471336363636359 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.01637197025341408 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0011375141815705729 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.014295623169967478 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 3687254249.6207814 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 14749016998.483126 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11519789582669275 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 33554432 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 28 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.004043317857142858 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.01100346103239151 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.004035516560077667 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.01078543812390853 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 4157389952.4964676 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 16629559809.98587 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.12988596452438353 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 134217728 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 7 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.015595228571428572 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.005255757704044775 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.015586642401559011 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.00526989300271275 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 4305536899.54981 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 17222147598.19924 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.13451439951105382 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 536870912 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0652593 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0652172622680664 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 4116018469.107669 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 16464073876.430676 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.12859342880241406 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 262144 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2245 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 8.775260579064617e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.2588213270041119 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 8.086750657362002e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.2692444298865416 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 810412027.9798346 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 6483296223.838676 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.05063809222568324 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1048576 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1055 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00021251706161137438 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.06362320460336848 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00020548479910145425 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.06357499446836187 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1275734269.1347759 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 10205874153.078207 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07971346345505972 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 328 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0006486628048780487 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.062073612578813026 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0006387051705543588 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.019862156160162552 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1641721483.3095798 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 13133771866.476639 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.10258194722004373 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 87 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0022856022988505752 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.01813131000490793 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.002277155692549959 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.018170531150182676 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1841904799.800148 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 14735238398.401184 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11509027741815471 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 22 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.008837786363636363 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.007500416221662679 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.008825995575297962 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.007696619972960275 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1900886518.3385963 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 15207092146.70877 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11877571346779532 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 6 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.035352516666666674 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.006577426327632489 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.03533673604329427 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.006429928650474585 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1899124580.090781 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 15192996640.726248 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11866561985071113 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.1430682 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.14305706024169923 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1876422285.9498873 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 15011378287.599098 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11724708110159256 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 524288 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1413 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00018726730360934168 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.19158497176050596 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00017967336391272188 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.1929620201640373 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 364750782.04599524 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 5836012512.735924 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.045582452142713725 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 2097152 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 593 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0005387549747048903 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.04111938777242051 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0005312656186282335 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.04063673179425505 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 493433022.59399897 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 7894928361.5039835 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.0616637118962758 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 8388608 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 181 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0018093104972375699 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.04001254538809503 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0017961900550357542 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.01423183226589721 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 583777867.5258992 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9340445880.414387 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.0729539949420019 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 33554432 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 47 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00701123404255319 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.022882268388176443 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.006997501982019303 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.023591403635686216 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 599400187.492284 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9590402999.876545 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07490629686231992 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 134217728 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 12 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.027476083333333335 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.009319641724100018 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.02746358140309652 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.009143918373427004 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 610889590.6091974 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9774233449.747158 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07634211329782521 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 536870912 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 3 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.11105753333333335 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.11104602813720703 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 604333762.5464745 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9669340200.743591 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07552283960840721 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 65536 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 3087 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 5.5682150955620366e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.15974697309068234 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 4.976085047405057e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.1566862558828566 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1317019290.7811313 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 2634038581.5622625 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.020573283097680757 } } }, "is_skipped": false }, "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 262144 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1508 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 8.478799734748009e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.18692260194964838 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 7.815668975504499e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.07663177112866752 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 3354082687.247878 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 6708165374.495756 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.052394443377403746 } } }, "is_skipped": false }, "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1048576 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 493 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.000172661663286004 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.03967760002514821 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0001667072121922918 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.03756877297429747 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 6289925829.9065 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 12579851659.813 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.09825552721048644 } } }, "is_skipped": false }, "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 126 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.000547765079365079 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.02267930122532969 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0005414133327347893 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.020125269100866486 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 7746953660.734054 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 15493907321.468107 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.12101589697472591 } } }, "is_skipped": false }, "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 31 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0019378612903225812 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.011107088633221586 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0019301894287909237 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.01047020541368632 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 8692004913.999191 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 17384009827.998383 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.13577863212320657 } } }, "is_skipped": false }, "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 8 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0075735125 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.008208621847337904 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.007558403968811036 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.006689815940438041 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 8878708293.036163 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 17757416586.072327 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.13869514329286683 } } }, "is_skipped": false }, "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.03057025 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.03056054401397705 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 8783726358.968918 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 17567452717.937836 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.13721142150351345 } } }, "is_skipped": false }, "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.1411317 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.14112380981445313 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 7608509332.420483 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 15217018664.840965 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11885324500781809 } } }, "is_skipped": false }, "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 131072 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2444 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 8.023633387888718e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.15957149507307994 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 7.409614451997878e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.11997847029431298 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 884472470.5254984 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 3537889882.1019936 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.027632856489799375 } } }, "is_skipped": false }, "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 524288 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1258 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00014721875993640703 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.05030707755378517 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00014142066840858286 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.04965983055115567 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1853647016.026198 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 7414588064.104792 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.05791199125300543 } } }, "is_skipped": false }, "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 2097152 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 395 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00038132582278481046 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.022492628088386854 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.000374839979561069 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.020997160180716862 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2797396374.922077 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 11189585499.688309 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.08739678751943505 } } }, "is_skipped": false }, "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 8388608 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 107 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0011724710280373834 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.019342776294093793 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0011632810418850906 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.016095597179303357 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 3605580980.846342 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 14422323923.385368 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11264624409042559 } } }, "is_skipped": false }, "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 33554432 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 27 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00414218888888889 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.010143570260672414 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.004132891301755552 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.01004138981574669 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 4059437999.9477477 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 16237751999.79099 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.12682573106560072 } } }, "is_skipped": false }, "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 134217728 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 7 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.015818842857142856 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.004297912986520931 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.015806600979396273 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.004417201153941865 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 4245622704.5571437 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 16982490818.228575 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1326425488801907 } } }, "is_skipped": false }, "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 536870912 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.06584135 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.06581772994995116 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 4078467248.9331145 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 16313868995.732458 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.12742024646754294 } } }, "is_skipped": false }, "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 262144 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2237 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 8.876477425122915e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.35106187182872767 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 8.160484933232342e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.2520628318430057 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 803089528.823398 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 6424716230.587184 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.05018055041385891 } } }, "is_skipped": false }, "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1048576 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1033 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0002160535333978701 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.1188318605057773 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00020900739510142425 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.12132837233117388 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1254233133.1041677 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 10033865064.833342 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07836997832442938 } } }, "is_skipped": false }, "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 324 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0006508543209876547 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.017018531103233366 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0006438862221476476 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.01711700991339697 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1628511317.578021 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 13028090540.624168 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.10175651821907154 } } }, "is_skipped": false }, "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 84 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.002322446428571428 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0213207920280134 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.002313739804994491 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.02154045421700801 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1812781191.2757347 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 14502249530.205877 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1132705068280264 } } }, "is_skipped": false }, "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 21 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.008975690476190476 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.008053073918408916 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.008964469319298155 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.007960830786781505 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1871523611.987053 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 14972188895.896423 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11694099050156542 } } }, "is_skipped": false }, "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 5 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.035879860000000006 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.007059995549678311 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.03586985015869141 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0070565543246424054 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1870898922.1617713 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 14967191377.29417 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11690195714582426 } } }, "is_skipped": false }, "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.1458295 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.14581488037109375 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1840933211.458537 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 14727465691.668297 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.11502956832407756 } } }, "is_skipped": false }, "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 524288 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1442 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00018916165048543735 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.08474054695372502 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00018243015832403314 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.08522171353418057 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 359238848.4561566 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 5747821575.298506 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.04489363264885736 } } }, "is_skipped": false }, "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 2097152 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 588 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0005411481292517009 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.02452569809177557 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0005339167867185301 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.023483747081537745 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 490982876.9594332 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 7855726031.350931 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.061357520239869186 } } }, "is_skipped": false }, "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 8388608 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 182 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.001794480219780219 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.01043393384097355 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.001785578369439303 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.009622529095864946 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 587247257.217429 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9395956115.478865 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.0733875602621131 } } }, "is_skipped": false }, "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 33554432 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 47 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.007055212765957448 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.022872858397093812 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.007045798778533936 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.02290392030641084 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 595291482.4616572 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9524663719.386515 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07439283709843254 } } }, "is_skipped": false }, "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 134217728 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 12 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.027424558333333335 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0056934303158599825 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.027411389350891106 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.005639627987530259 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 612052741.4804167 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9792843863.686666 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07648747081734775 } } }, "is_skipped": false }, "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 536870912 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 3 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.11087960000000001 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.11086710357666014 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 605309075.7764491 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9684945212.423185 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07564472329123333 } } }, "is_skipped": false }, "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "I64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 262144 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2324 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 8.308123924268518e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.20123216459376678 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 7.645772117159609e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.2163426509558451 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 857153456.7832047 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 6857227654.265637 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.05355870137360689 } } }, "is_skipped": false }, "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1048576 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1081 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00018608593894542115 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.061054319816048364 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00017904156912376654 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.058580426390462105 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1464151600.563705 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 11713212804.50964 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.09148660338438547 } } }, "is_skipped": false }, "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 361 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0004975271468144047 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.014379246069577772 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0004902403537091129 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.012272733824773483 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2138901851.0340319 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 17111214808.272255 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1336479537012017 } } }, "is_skipped": false }, "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 96 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.001751761458333334 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.005879019262093316 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.001744055998822054 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.005818847781913598 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2404913605.315918 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 19239308842.527344 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.15026953294900763 } } }, "is_skipped": false }, "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 24 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.006750754166666667 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.002444731344335298 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0067418733040491745 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0023841178656418723 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2488509534.8682375 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 19908076278.9459 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.15549297268609333 } } }, "is_skipped": false }, "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 6 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.026754200000000006 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0006428245182255163 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.026743643124898273 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0006056040012260031 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2509338899.2138395 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 20074711193.710716 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1567944825802199 } } }, "is_skipped": false }, "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.10674365000000001 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.10673254394531251 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2515029119.3053603 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 20120232954.442883 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1571500324484729 } } }, "is_skipped": false }, "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^16": { "device": 0, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 65536 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 524288 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1446 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00017735048409405266 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.11011368701446163 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00017031459973024972 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.1112846952116118 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 384793788.1062353 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 6156700609.699765 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.048087201712851205 } } }, "is_skipped": false }, "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^18": { "device": 0, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 262144 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 2097152 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 614 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0005115635179153098 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.014363302118375774 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.000504564481760081 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.013743392821332084 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 519545091.8097892 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 8312721468.956627 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.06492690475003614 } } }, "is_skipped": false }, "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 8388608 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 187 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0017560828877005348 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.007271133357282276 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0017482294819571758 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.007054498367435898 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 599793111.1572946 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9596689778.516714 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07495540004465066 } } }, "is_skipped": false }, "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 33554432 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 50 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0067117 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0020046252309587377 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.006701972465515136 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.001795830890697078 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 625831279.0125157 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 10013300464.20025 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07820935753718017 } } }, "is_skipped": false }, "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 134217728 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 13 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.02650104615384615 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0007265940345306042 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.026490544979388894 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.00072958672729313 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 633328457.8725579 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 10133255325.960926 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07914627066640313 } } }, "is_skipped": false }, "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 536870912 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 3 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.10620103333333335 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.10618826548258464 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 631980037.4835782 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 10111680599.737251 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07897776024538593 } } }, "is_skipped": false }, "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "F64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" } } }, { "index": 1, "name": "cub::DeviceRadixSort::SortKeys - Constant Values", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ 0 ], "axes": { "Key": { "type": "type", "flags": "", "values": [ { "input_string": "U8", "description": "uint8_t", "is_active": true }, { "input_string": "U16", "description": "uint16_t", "is_active": true }, { "input_string": "U32", "description": "uint32_t", "is_active": true }, { "input_string": "U64", "description": "uint64_t", "is_active": true } ] }, "Input": { "type": "type", "flags": "", "values": [ { "input_string": "Const", "description": "All values = 42", "is_active": true } ] }, "Pattern": { "type": "type", "flags": "", "values": [ { "input_string": "Ascend", "description": "", "is_active": true } ] }, "Elements": { "type": "int64", "flags": "pow2", "values": [ { "input_string": "20", "description": "2^20 = 1048576", "value": 1048576 }, { "input_string": "22", "description": "2^22 = 4194304", "value": 4194304 }, { "input_string": "24", "description": "2^24 = 16777216", "value": 16777216 }, { "input_string": "26", "description": "2^26 = 67108864", "value": 67108864 }, { "input_string": "28", "description": "2^28 = 268435456", "value": 268435456 }, { "input_string": "30", "description": "2^30 = 1073741824", "value": 1073741824 } ] } }, "states": { "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1048576 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2566 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 9.6390140296181e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.045598791758050095 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 9.036228858044716e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.033692751648895305 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 11604132835.419285 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 23208265670.83857 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1812692582388666 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1160 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0003043432758620684 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.014476575112824767 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00029814910513573545 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.011313650678305812 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 14067806771.013115 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 28135613542.02623 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.21975454216153953 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 363 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.001116191460055097 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.011463805762636945 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0011063706461063093 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.008802868537347153 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 15164191185.878504 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 30328382371.757008 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.23688126696261097 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 100 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.004274347 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0032816652112393967 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.004266441283226011 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0032308789304643024 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 15729470897.40716 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 31458941794.81432 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.24571155488326604 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 26 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.017081826923076922 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.010556997792740205 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.017073184013366702 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.010548301451256467 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 15722635906.099308 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 31445271812.198616 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.24560478483659254 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 7 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.06782757142857143 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.002248092939308053 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.06781701987130302 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.002251004801379183 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 15832925510.994875 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 31665851021.98975 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.24732762920199441 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 2097152 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1374 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00025053580786026195 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.06364830482823801 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0002446616114899394 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.06504028362673961 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 4285821521.4654465 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 17143286085.861786 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.13389844793381175 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 8388608 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 484 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.000845314876033058 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0042877181599594 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0008389603308409695 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.004147125806742644 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 4999406820.338754 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 19997627281.355015 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1561924150318281 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 33554432 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 138 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.003167936956521741 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.009915660724473297 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0031528834553732395 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.001304193155702654 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 5321229356.387328 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 21284917425.549313 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1662468556731857 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 134217728 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 37 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.012415567567567568 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0020929194181039177 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.012407164470569507 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0020887980997006564 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 5408880019.216801 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 21635520076.867203 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1689852542869533 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 536870912 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 10 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.04940072 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.000576076970718073 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.04938988418579101 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0005818752831350183 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 5435029063.648347 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 21740116254.593388 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.16980220768708906 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 804 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00047247549751243796 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.012916467373333794 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0004640141693291389 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.005379896492658131 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2259793060.8800316 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 18078344487.040253 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.14120176586353608 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 251 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.001718760956175299 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.008648709803676304 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0017113251937813012 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.008654677513055743 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2450909982.0662203 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 19607279856.529762 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.15314358798214323 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 68 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.006638683823529413 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.002767845306863416 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0066234131490483005 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0015437824831612992 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2533016682.2540236 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 20264133458.03219 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.15827397414733962 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 18 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.02638116111111112 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0035695573142047353 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.02635982047186958 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0034495125905173608 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2545877126.576662 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 20367017012.613297 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.15907755102328555 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 5 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.10501874 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0005592643361183458 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.10500200805664064 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.00055006156023586 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 2556479261.3794527 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 20451834091.03562 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.15974001883150793 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^20": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 8388608 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 254 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0017053511811023635 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.005201838970683627 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0016952332624300263 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0035478470341980604 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 618543785.8250388 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 9896700573.20062 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07729864856598835 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^22": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 33554432 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 71 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.006611254929577467 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.003663027154345941 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0066021115477655976 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0036695333152667172 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 635297354.4379919 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 10164757671.00787 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.07939232122444287 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^24": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 134217728 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 19 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.026109178947368427 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0007116177577395647 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.026097561384502206 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0007337748011409093 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 642865275.9089972 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 10285844414.543955 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.08033807496988217 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^26": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 536870912 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 5 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.10435436000000002 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0008827267294601751 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.10434051666259767 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0009079744888650902 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 643171666.6403677 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 10290746666.245884 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.08037636423898623 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^28": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^30": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Const" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" } } }, { "index": 2, "name": "cub::DeviceRadixSort::SortKeys - Half Word", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ 0 ], "axes": { "Key": { "type": "type", "flags": "", "values": [ { "input_string": "U8", "description": "uint8_t", "is_active": true }, { "input_string": "U16", "description": "uint16_t", "is_active": true }, { "input_string": "U32", "description": "uint32_t", "is_active": true }, { "input_string": "U64", "description": "uint64_t", "is_active": true } ] }, "Input": { "type": "type", "flags": "", "values": [ { "input_string": "Rand", "description": "Random values uniformly distributed across `T`'s value range", "is_active": true } ] }, "Pattern": { "type": "type", "flags": "", "values": [ { "input_string": "Ascend", "description": "", "is_active": true } ] }, "Elements": { "type": "int64", "flags": "pow2", "values": [ { "input_string": "20", "description": "2^20 = 1048576", "value": 1048576 }, { "input_string": "22", "description": "2^22 = 4194304", "value": 4194304 }, { "input_string": "24", "description": "2^24 = 16777216", "value": 16777216 }, { "input_string": "26", "description": "2^26 = 67108864", "value": 67108864 }, { "input_string": "28", "description": "2^28 = 268435456", "value": 268435456 }, { "input_string": "30", "description": "2^30 = 1073741824", "value": 1073741824 } ] }, "Bits": { "type": "string", "flags": "", "values": [ { "input_string": "Half", "description": "", "value": "Half" } ] } }, "states": { "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1048576 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 538 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 8.960966542750939e-05 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.08397008166379398 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 8.340330078477751e-05 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.045910043356146846 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 12572356131.394053 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 25144712262.788105 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.19639396606151668 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 131 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00028253587786259543 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.04123287912799956 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0002716179535589143 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.037808745584459125 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 15441924751.451488 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 30883849502.902977 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.24121976929910474 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 33 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0009550272727272727 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.007063558495008569 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0009488116340203719 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.007360718544290043 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 17682346419.92151 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 35364692839.84302 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.27621760840917126 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 8 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.003773775 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.014705369350832631 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0037657760083675386 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.014874000832241253 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 17820726418.90659 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 35641452837.81318 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.2783792554815451 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.014729750000000002 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.01471895980834961 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 18237393096.74077 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 36474786193.48154 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.2848880451252932 } } }, "is_skipped": false }, "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U8" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 1 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.06453160000000001 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.06452188873291016 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 16641512594.97035 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 33283025189.9407 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.2599586446352529 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 2097152 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 450 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00025646377777777785 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.021618265944340534 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.00025038620548115814 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.022009562632867193 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 4187834541.3837366 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 16751338165.534946 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.13083712013820722 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 8388608 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 116 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0007816620689655175 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.020853227399617164 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0007727365504051081 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.016658278118279097 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 5427857654.463388 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 21711430617.853554 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.16957815716269023 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 33554432 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 29 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.002729172413793104 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.007205020379178172 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.002722046876775808 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.007340614282676166 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 6163455943.077721 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 24653823772.310883 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.19255985825661462 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 134217728 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 7 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0105532 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.005970051554930724 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.010544246673583986 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.005939659771636435 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 6364500573.390865 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 25458002293.56346 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1988409326852932 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 536870912 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0433476 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.04333521652221679 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 6194395171.935518 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 24777580687.742073 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1935264675061084 } } }, "is_skipped": false }, "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U16" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 4194304 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 403 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.00035955682382134016 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.04131883733262655 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.000348795553294956 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.031411058442745776 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 3006276857.874047 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 24050214862.992374 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.18784534228155753 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 16777216 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 103 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0012605281553398058 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0430687589837951 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0012512904081529784 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.03898592591253357 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 3351982859.191884 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 26815862873.535072 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.20944656705772832 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 67108864 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 26 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.004762250000000001 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.004982744499450167 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.004748374150349544 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0038811646473003495 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 3533254850.771389 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 28266038806.171112 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.22077323486449568 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 268435456 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 7 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.01901851428571429 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.010872125017202998 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.019009632383074078 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.01087158253950949 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 3530255748.646293 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 28242045989.170345 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.22058583783093558 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 1073741824 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 2 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.07661000000000001 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.07659774398803712 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": null } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 3504482534.6543326 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 28035860277.23466 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.21897541456225522 } } }, "is_skipped": false }, "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U32" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1048576 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 8388608 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 270 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0009441937037037035 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.08572346375827458 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0009308705164326564 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.019541093452539494 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1126446677.0506625 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 18023146832.8106 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1407706419708401 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 4194304 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 33554432 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 68 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.0036803838235294115 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.04345566028172002 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.0036717261111035055 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.04355542870677633 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1142324855.6901317 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 18277197691.042107 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.14275491823170852 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 16777216 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 134217728 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 18 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.01408933888888889 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0017506825580184378 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.014078581280178495 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.0016365268643609497 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1191683712.024376 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 19066939392.390015 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.1489232331947483 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 67108864 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": { "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "Size" }, "value": { "type": "int64", "value": 536870912 } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": 5 } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": 0.05700572 } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": 0.0126927638236131 } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": 0.05699455947875977 } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": 0.012703536689392532 } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": 1177460877.2089825 } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": 18839374035.34372 } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": 0.147145823195324 } } }, "is_skipped": false }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 268435456 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" }, "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Key": { "type": "string", "value": "U64" }, "Input": { "type": "string", "value": "Rand" }, "Pattern": { "type": "string", "value": "Ascend" }, "Elements": { "type": "int64", "value": 1073741824 }, "Bits": { "type": "string", "value": "Half" } }, "summaries": null, "is_skipped": true, "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" } } } ] }