From 1819b003c2cc9ff14152f6099eaab6eb94c8cb4d Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Thu, 8 Jul 2021 14:44:18 -0400 Subject: [PATCH] Update test json files with new number format and multi-device. --- scripts/test_cmp.json | 38222 ++++++++++++++++++---------------------- scripts/test_ref.json | 38222 ++++++++++++++++++---------------------- 2 files changed, 33974 insertions(+), 42470 deletions(-) diff --git a/scripts/test_cmp.json b/scripts/test_cmp.json index 592d52c..20c8f8a 100644 --- a/scripts/test_cmp.json +++ b/scripts/test_cmp.json @@ -2,21 +2,42 @@ "devices": [ { "id": 0, - "name": "NVIDIA GeForce GTX 1650", - "sm_version": 750, - "ptx_version": 750, - "sm_default_clock_rate": 1560000000, - "number_of_sms": 16, - "max_blocks_per_sm": 16, - "max_threads_per_sm": 1024, + "name": "NVIDIA Quadro GV100", + "sm_version": 700, + "ptx_version": 700, + "sm_default_clock_rate": 1627000000, + "number_of_sms": 80, + "max_blocks_per_sm": 32, + "max_threads_per_sm": 2048, "max_threads_per_block": 1024, "registers_per_sm": 65536, "registers_per_block": 65536, - "global_memory_size": 4294967296, - "global_memory_bus_peak_clock_rate": 4001000000, - "global_memory_bus_width": 128, - "global_memory_bus_bandwidth": 128032000000, - "l2_cache_size": 1048576, + "global_memory_size": 34078982144, + "global_memory_bus_peak_clock_rate": 850000000, + "global_memory_bus_width": 4096, + "global_memory_bus_bandwidth": 870400000000, + "l2_cache_size": 6291456, + "shared_memory_per_sm": 98304, + "shared_memory_per_block": 49152, + "ecc_state": false + }, + { + "id": 1, + "name": "NVIDIA Quadro GP100", + "sm_version": 600, + "ptx_version": 600, + "sm_default_clock_rate": 1442500000, + "number_of_sms": 56, + "max_blocks_per_sm": 32, + "max_threads_per_sm": 2048, + "max_threads_per_block": 1024, + "registers_per_sm": 65536, + "registers_per_block": 65536, + "global_memory_size": 17069309952, + "global_memory_bus_peak_clock_rate": 715000000, + "global_memory_bus_width": 4096, + "global_memory_bus_bandwidth": 732160000000, + "l2_cache_size": 4194304, "shared_memory_per_sm": 65536, "shared_memory_per_block": 49152, "ecc_state": false @@ -25,25 +46,7256 @@ "benchmarks": [ { "index": 0, - "name": "cub::DeviceRadixSort::SortKeys - Overview", + "name": "simple", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ - 0 + 0, + 1 + ], + "axes": null, + "states": { + "Device=0": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": null, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "486" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010095795164609047" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0006114730449640358" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010034803637751827" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005535128658782786" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001001473929135854" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "524" + } + } + }, + "is_skipped": false + }, + "Device=1": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": null, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "488" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010075532745901644" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005196761038903798" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010027413077530309" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0003559489414701089" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010014738126565483" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "524" + } + } + }, + "is_skipped": false + } + } + }, + { + "index": 1, + "name": "single_float64_axis", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "devices": [ + 0, + 1 ], "axes": { - "Key": { - "type": "type", + "Duration": { + "type": "float64", "flags": "", "values": [ { - "input_string": "bool", + "input_string": "0", "description": "", - "is_active": true + "value": 0.0 }, + { + "input_string": "0.0001", + "description": "", + "value": 0.0001 + }, + { + "input_string": "0.0002", + "description": "", + "value": 0.0002 + }, + { + "input_string": "0.0003", + "description": "", + "value": 0.00030000000000000003 + }, + { + "input_string": "0.0004", + "description": "", + "value": 0.0004 + }, + { + "input_string": "0.0005", + "description": "", + "value": 0.0005 + }, + { + "input_string": "0.0006", + "description": "", + "value": 0.0006000000000000001 + }, + { + "input_string": "0.0007", + "description": "", + "value": 0.0007000000000000001 + }, + { + "input_string": "0.0008", + "description": "", + "value": 0.0008000000000000001 + }, + { + "input_string": "0.0009", + "description": "", + "value": 0.0009000000000000002 + }, + { + "input_string": "0.001", + "description": "", + "value": 0.0010000000000000002 + } + ] + } + }, + "states": { + "Device=0 Duration=0": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "14050" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "9.162447829181515e-06" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.03536831341378678" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "3.7685477789450405e-06" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.1242757507930245" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "1.6396544558213103e-06" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "305626" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0001": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "3833" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00010862307644142961" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.003939155614687134" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00010305688671520844" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004761970040891668" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00010137620362095862" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "5088" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0002": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "2173" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002089772070869765" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0021045460135476644" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00020339080587790604" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0024397165593270475" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00020172840121363044" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2582" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0003": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.00030000000000000003" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1519" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00030826679394338436" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0014114649766999914" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003027125938382783" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0016421113166888573" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003010571695496376" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1742" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0004": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0004" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1166" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004085880488850769" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0010128960046765418" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004030490282469304" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0012417175292930155" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004014095938278854" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1304" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0005": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0005" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "945" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0005090367534391529" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0008945139151387666" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0005034566609317042" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0009551336090877046" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0005017609577982818" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1044" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0006": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0006000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "796" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006083192776381908" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0006965661259016556" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.000602732541573109" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0008277718240662281" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006010903174724053" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "872" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0007": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0007000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "686" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0007086454693877553" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005852496456482882" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007030655056151287" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0007134410597090336" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007014413925415692" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "748" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0008": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0008000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "603" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0008089611791044773" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005396538572101352" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008033946325529826" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0006279991294953942" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008017935563300363" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "654" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0009": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0009000000000000002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "539" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0009083643543599264" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00046818252995143266" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009027890649266406" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005476172787827472" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009011217884181701" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "582" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.001": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0010000000000000002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "486" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010087072057613155" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00041872799651378016" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010031157275776806" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0004937944200398705" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001001475909284053" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "524" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "14964" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "8.23057591553059e-06" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.050523894758860086" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "3.353875434461126e-06" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.05725803858596207" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "1.355632943746511e-06" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "368832" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0001": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "3942" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.000107132078640284" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0038634259631888097" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0001024897333569082" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.003060119065396588" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0001013761587297066" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "5074" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0002": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "2208" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002075046254528986" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.001976178510512602" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00020285036215099666" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0015468676288172283" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00020172864733863198" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2595" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0003": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.00030000000000000003" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1537" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0003067784710474956" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0013859894180016342" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003021601552535132" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0010334107287380953" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003010565582609204" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1737" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0004": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0004" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1176" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004071361462585035" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0010022100665278571" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00040249948315068836" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0007542402160340061" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004014089213396104" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1306" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0005": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0005" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "951" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0005075497234490012" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0007999383507906208" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0005028805712670812" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005997416033360081" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0005017610334757409" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1046" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0006": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0006000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "800" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006068307462500002" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.000718570216952097" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006021880812197924" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005176533625753945" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006010893901462271" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "873" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0007": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0007000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "690" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0007071831971014495" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005757596123903934" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007025585162466849" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00045240029617132243" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007014411477481618" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "748" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0008": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0008000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "606" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0008075822128712869" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005708387835625503" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008029147460319033" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0004053734463924786" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00080179443359375" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "655" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0009": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0009000000000000002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "541" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0009068751312384467" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0004621968439524866" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009022267493875558" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0003405832639595575" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009011221046513186" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "583" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.001": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0010000000000000002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "488" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010072327028688517" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0004212022611220011" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010025862904845687" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0003195925477697865" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010014755598461355" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "524" + } + } + }, + "is_skipped": false + } + } + }, + { + "index": 2, + "name": "copy_sweep_grid_shape", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "devices": [ + 0, + 1 + ], + "axes": { + "BlockSize": { + "type": "int64", + "flags": "pow2", + "values": [ + { + "input_string": "6", + "description": "2^6 = 64", + "value": 64 + }, + { + "input_string": "8", + "description": "2^8 = 256", + "value": 256 + }, + { + "input_string": "10", + "description": "2^10 = 1024", + "value": 1024 + } + ] + }, + "NumBlocks": { + "type": "int64", + "flags": "pow2", + "values": [ + { + "input_string": "6", + "description": "2^6 = 64", + "value": 64 + }, + { + "input_string": "8", + "description": "2^8 = 256", + "value": 256 + }, + { + "input_string": "10", + "description": "2^10 = 1024", + "value": 1024 + } + ] + } + }, + "states": { + "Device=0 BlockSize=2^6 NumBlocks=2^6": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "70" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.007152438914285718" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.05103516144522029" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.007146643659046716" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.05105065000806433" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "9390263066.362482" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "75122104530.89986" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.08630756494818458" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.006479399461012621" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "78" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^8 NumBlocks=2^6": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "229" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.002168227908296944" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0073699479168641575" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0021626523655054347" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007425464144896749" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "31030814323.371826" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "248246514586.9746" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.28520969047216754" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.002159405241287294" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "243" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^10 NumBlocks=2^6": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "448" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010965164419642858" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.012922365203073934" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010910109984023236" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.013003332317485732" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "61510712631.013084" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "492085701048.1047" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.565355814623282" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010869085366833847" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "486" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^6 NumBlocks=2^8": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "229" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.002169319052401745" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.003922215380349919" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0021638464469576494" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.003914908768819146" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "31013690502.09386" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "248109524016.7509" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.2850523024089509" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0021620516050990224" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "243" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^8 NumBlocks=2^8": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "456" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.001076449870614035" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.011436755828819038" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010709658272956548" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.01147415502832018" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "62662003109.342606" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "501296024874.74084" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5759375285785165" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010696090290923384" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "487" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^10 NumBlocks=2^8": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "500" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0009796881099999996" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006630669601394768" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009742486392259615" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006702329573169841" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "68882686922.01393" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "551061495376.1115" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6331129312685104" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009725467921183118" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "542" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^6 NumBlocks=2^10": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "459" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010702333769063192" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009289316979889557" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010647220575212134" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009351871253339476" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "63029467198.450455" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "504235737587.60364" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5793149558681108" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010658674782853784" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "492" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^8 NumBlocks=2^10": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "500" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.000979172994" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007443039949868979" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009736743034124385" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007443203952714375" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "68923318367.14127" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "551386546937.1301" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6334863820509308" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009717381278159657" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "541" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^10 NumBlocks=2^10": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "474" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010353058628691984" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.02225997387052118" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010297924077712025" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.022347468768318675" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "65167371106.61446" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "521338968852.9157" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5989648079652065" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010282588096414032" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "522" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^6 NumBlocks=2^6": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "76" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0066473009473684225" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0010215778080601146" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.006642639580525849" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.001033363366599417" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "10102740512.482763" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "80821924099.8621" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.11038833601926096" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.006642121520223497" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "79" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^8 NumBlocks=2^6": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "216" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0023017428981481495" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0022391000845061416" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.002297048738709203" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002236143378518806" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "29215254717.542892" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "233722037740.34314" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.3192226258472781" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.002296809949372944" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "228" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^10 NumBlocks=2^6": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "418" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.001179723488038277" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0038851033159110274" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001175016037870252" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0039137311385035334" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "57113147256.81244" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "456905178054.4995" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6240509971242618" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011733407974243164" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "448" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^6 NumBlocks=2^8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "224" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.002222525227678572" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0013751517218031045" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.002217807294002601" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0013709949704347455" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "30259105099.65222" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "242072840797.21777" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.33062833369375244" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.002216307048556171" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "237" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^8 NumBlocks=2^8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "434" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011343795576036872" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0063161302839560935" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011296571429973376" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006343846096918854" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "59406399911.69264" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "475251199293.54114" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6491083906434948" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011274932015425106" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "469" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^10 NumBlocks=2^8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "437" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011263629336384434" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002337926005235178" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011216670730294026" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002335621873443411" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "59829574758.535194" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "478636598068.28156" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6537322416797989" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011199346014793882" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "470" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^6 NumBlocks=2^10": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "439" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011228420820045561" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0030427104293198376" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011181568284784197" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0030409709404223893" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "60017398535.51786" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "480139188284.1429" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6557845119702563" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011164653372257314" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "470" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^8 NumBlocks=2^10": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "440" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00112084285909091" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0025157975228794673" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011161849425597621" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0025072621927497768" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "60123427078.40005" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "480987416627.2004" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6569430406293711" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011137209100238348" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "472" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^10 NumBlocks=2^10": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "464" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010599195581896552" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0019836197107494535" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010552384144273295" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0019838471234599063" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "63595925889.809" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "508767407118.472" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.69488555386592" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00105384634014122" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "499" + } + } + }, + "is_skipped": false + } + } + }, + { + "index": 3, + "name": "copy_type_sweep", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "devices": [ + 0, + 1 + ], + "axes": { + "T": { + "type": "type", + "flags": "", + "values": [ { "input_string": "U8", "description": "uint8_t", @@ -64,6 +7316,2415 @@ "description": "uint64_t", "is_active": true }, + { + "input_string": "F32", + "description": "float", + "is_active": true + }, + { + "input_string": "F64", + "description": "double", + "is_active": true + } + ] + } + }, + "states": { + "Device=0 T=U8": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U8" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "217" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0022855767235023037" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00300372701685277" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00228007503588628" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0030435512411696388" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "117730974540.34332" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "235461949080.68665" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.27052154076365653" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0022790989087975544" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "230" + } + } + }, + "is_skipped": false + }, + "Device=0 T=U16": { + "device": 0, + "type_config_index": 1, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U16" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "342" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0014443213274853803" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005313412134692269" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0014388333057102406" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0053179597407149795" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "93282333309.4497" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "373129333237.7988" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.4286871935176917" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001437488301595052" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "360" + } + } + }, + "is_skipped": false + }, + "Device=0 T=U32": { + "device": 0, + "type_config_index": 2, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U32" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "456" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010777073771929832" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.011572026564316875" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010722620346044238" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.011623749514354471" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "62586253951.21598" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "500690031609.72784" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5752413046986763" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001070086296237245" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "490" + } + } + }, + "is_skipped": false + }, + "Device=0 T=U64": { + "device": 0, + "type_config_index": 3, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "514" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0009532965797665363" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0051196185177249205" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009478394400748767" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005131328754237089" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "35400966219.92148" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "566415459518.7437" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6507530555132625" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009458808417792793" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "555" + } + } + }, + "is_skipped": false + }, + "Device=0 T=F32": { + "device": 0, + "type_config_index": 4, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "456" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010760199342105259" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.010177745756624636" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010705234403150128" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.010199116668820488" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "62687897782.2779" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "501503182258.2232" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5761755310871131" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010701700846354166" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "489" + } + } + }, + "is_skipped": false + }, + "Device=0 T=F64": { + "device": 0, + "type_config_index": 5, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "514" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0009537802023346293" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00826653498170841" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009483462104769544" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008316637345502817" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "35382048907.14371" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "566112782514.2993" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6504053107930828" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009457701526988636" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "550" + } + } + }, + "is_skipped": false + }, + "Device=1 T=U8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U8" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "184" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0027052529021739146" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.003601477829562271" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0027005073000555477" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0036073295118906625" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "99401862751.66835" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "198803725503.3367" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.2715304380235696" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0027012594746802137" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "193" + } + } + }, + "is_skipped": false + }, + "Device=1 T=U16": { + "device": 1, + "type_config_index": 1, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U16" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "325" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0015229335907692304" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00461287264596262" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001518287454751822" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004612569935175065" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "88400735697.27223" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "353602942789.0889" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.48295856477967786" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001517369088409953" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "346" + } + } + }, + "is_skipped": false + }, + "Device=1 T=U32": { + "device": 1, + "type_config_index": 2, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U32" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "435" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011324326574712646" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006407601007533829" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011277568740406254" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006437993538809961" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "59506499623.0584" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "476051996984.4672" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6502021374897116" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011264256719333023" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "469" + } + } + }, + "is_skipped": false + }, + "Device=1 T=U64": { + "device": 1, + "type_config_index": 3, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "468" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010516201538461542" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002564258397948022" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010469369577546404" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0025731489764373736" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "32050097908.439487" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512801566535.0318" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.700395496250863" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010448107472453933" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "502" + } + } + }, + "is_skipped": false + }, + "Device=1 T=F32": { + "device": 1, + "type_config_index": 4, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "435" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.001132957098850574" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006385301490538342" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011282652709675928" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006406051844181131" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "59479685962.90115" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "475837487703.2092" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6499091560631682" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011265583781452921" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "462" + } + } + }, + "is_skipped": false + }, + "Device=1 T=F64": { + "device": 1, + "type_config_index": 5, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "468" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010518258760683764" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002638851741610878" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010471444782028856" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0026422486584488855" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "32043746300.974895" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512699940815.5983" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.7002566936401856" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010447956953898514" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "505" + } + } + }, + "is_skipped": false + } + } + }, + { + "index": 4, + "name": "copy_type_conversion_sweep", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "devices": [ + 0, + 1 + ], + "axes": { + "In": { + "type": "type", + "flags": "", + "values": [ { "input_string": "I8", "description": "int8_t", @@ -80,13 +9741,13 @@ "is_active": true }, { - "input_string": "I64", - "description": "int64_t", + "input_string": "F32", + "description": "float", "is_active": true }, { - "input_string": "F32", - "description": "float", + "input_string": "I64", + "description": "int64_t", "is_active": true }, { @@ -96,77 +9757,45 @@ } ] }, - "Input": { + "Out": { "type": "type", "flags": "", "values": [ { - "input_string": "Rand", - "description": "Random values uniformly distributed across `T`'s value range", + "input_string": "I8", + "description": "int8_t", "is_active": true - } - ] - }, - "Pattern": { - "type": "type", - "flags": "", - "values": [ + }, { - "input_string": "Ascend", - "description": "", + "input_string": "I16", + "description": "int16_t", "is_active": true - } - ] - }, - "Elements": { - "type": "int64", - "flags": "pow2", - "values": [ - { - "input_string": "16", - "description": "2^16 = 65536", - "value": 65536 }, { - "input_string": "18", - "description": "2^18 = 262144", - "value": 262144 + "input_string": "I32", + "description": "int32_t", + "is_active": true }, { - "input_string": "20", - "description": "2^20 = 1048576", - "value": 1048576 + "input_string": "F32", + "description": "float", + "is_active": true }, { - "input_string": "22", - "description": "2^22 = 4194304", - "value": 4194304 + "input_string": "I64", + "description": "int64_t", + "is_active": true }, { - "input_string": "24", - "description": "2^24 = 16777216", - "value": 16777216 - }, - { - "input_string": "26", - "description": "2^26 = 67108864", - "value": 67108864 - }, - { - "input_string": "28", - "description": "2^28 = 268435456", - "value": 268435456 - }, - { - "input_string": "30", - "description": "2^30 = 1073741824", - "value": 1073741824 + "input_string": "F64", + "description": "double", + "is_active": true } ] } }, "states": { - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^16": { + "Device=0 In=I8 Out=I8": { "device": 0, "type_config_index": 0, "min_samples": 10, @@ -175,4378 +9804,48 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "bool" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 65536 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 3042 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 3.927120315581862e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.4351576079801729 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 3.2775679267673803e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.4041140207967676 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1999531404.5142384 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 3999062809.0284767 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.031234869478165433 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 262144 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1691 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 5.06085156712005e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.3900269997798082 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 4.116893645171086e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.15850062777398677 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6367519362.747736 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 12735038725.495472 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.09946762313714909 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 537 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00010505661080074492 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.13838288384658645 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 9.64652218021716e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.08737598421104262 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 10869990037.96822 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 21739980075.93644 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.16980114405723912 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 141 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0003081985815602837 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.025128010564140792 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0003021013356269674 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02018452044120483 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 13883765165.404953 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 27767530330.809906 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2168796108067507 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 34 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.001104020588235294 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.00868011645589697 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0010932272953145647 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.003434233699996367 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15346503029.978348 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 30693006059.956696 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.23972917754902442 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 8 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0042372500000000006 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0030855867536742037 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0042257159948349 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0019932191078502345 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15881063488.892126 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31762126977.784252 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2480795971146608 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.019142950000000002 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.019130672454833984 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 14031679055.388933 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 28063358110.777866 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2191901876935287 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0667937 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.06678323364257813 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 16078014876.407965 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 32156029752.81593 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2511561933955256 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 65536 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 3062 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 5.317815153494455e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.3713839348328813 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 4.7155929496967694e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.40429857529493773 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1389772202.54378 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 2779544405.08756 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.021709763223940578 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 262144 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1569 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 7.891446781389421e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.11657350278726154 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 7.26659224292061e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.11037642266139251 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3607523186.0627747 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 7215046372.125549 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.05635346141687664 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 505 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0001637732673267328 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04440230894077945 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00015781791602620992 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04411047078155534 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6644213954.934341 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 13288427909.868683 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1037898955719561 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 129 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0005240697674418604 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01553926208912671 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0005178778284741924 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.015023407302414188 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8099022142.650033 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16198044285.300066 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12651559208088656 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 31 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0019522129032258073 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.23301756357696535 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.001942181168063994 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.23453440661836827 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8638337285.8691 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17276674571.7382 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13494028502044958 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 8 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0072903875 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007239378588174434 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.007273227989673614 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007137742915442734 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 9226833545.611362 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 18453667091.222725 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.14413324083996754 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.029422500000000004 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.029412703514099123 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 9126514190.418577 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 18253028380.837154 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.14256614268961787 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.12713770000000002 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.12712566375732423 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8446302597.481127 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16892605194.962254 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.131940492962402 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 131072 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2506 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 7.933663208300092e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.27652333688390907 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 7.329776243909458e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.2908651937565574 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 894106420.4307182 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 3576425681.7228727 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.027933842177915464 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 524288 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1269 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00014559330181245089 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.19873696733232882 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00013759964715081717 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.09333440846518179 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1905121164.3927765 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 7620484657.571106 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.05952015634818722 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 406 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00038108940886699564 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.03821569814913922 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00037408323064813464 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.029659979896029746 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2803055347.290609 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 11212221389.162436 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.08757358620627996 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 110 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0011471336363636359 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01637197025341408 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0011375141815705729 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.014295623169967478 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3687254249.6207814 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14749016998.483126 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11519789582669275 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 28 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.004043317857142858 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01100346103239151 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.004035516560077667 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01078543812390853 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4157389952.4964676 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16629559809.98587 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12988596452438353 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.015595228571428572 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005255757704044775 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.015586642401559011 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.00526989300271275 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4305536899.54981 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17222147598.19924 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13451439951105382 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0652593 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0652172622680664 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4116018469.107669 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16464073876.430676 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12859342880241406 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 + "value": "I8" } }, "summaries": null, "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" + "skip_reason": "Not a conversion: InputType == OutputType." }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^16": { + "Device=0 In=I8 Out=I16": { "device": 0, - "type_config_index": 3, + "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U32" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "I16" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -4554,11 +9853,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 262144 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" } }, "Number of Samples (Cold)": { @@ -4576,7 +9889,7 @@ }, "value": { "type": "int64", - "value": 2245 + "value": "775" } }, "Average CPU Time (Cold)": { @@ -4594,7 +9907,7 @@ }, "value": { "type": "float64", - "value": 8.775260579064617e-05 + "value": "0.000624855941935483" } }, "CPU Relative Standard Deviation (Cold)": { @@ -4612,7 +9925,7 @@ }, "value": { "type": "float64", - "value": 0.2588213270041119 + "value": "0.0026998034506879763" } }, "Average GPU Time (Cold)": { @@ -4630,7 +9943,7 @@ }, "value": { "type": "float64", - "value": 8.086750657362002e-05 + "value": "0.0006194506773641069" } }, "GPU Relative Standard Deviation (Cold)": { @@ -4648,7 +9961,7 @@ }, "value": { "type": "float64", - "value": 0.2692444298865416 + "value": "0.0027411303205337333" } }, "Element Throughput": { @@ -4666,7 +9979,7 @@ }, "value": { "type": "float64", - "value": 810412027.9798346 + "value": "108336089461.65392" } }, "Average Global Memory Throughput": { @@ -4684,7 +9997,7 @@ }, "value": { "type": "float64", - "value": 6483296223.838676 + "value": "325008268384.96173" } }, "Percent Peak Global Memory Throughput": { @@ -4702,39 +10015,77 @@ }, "value": { "type": "float64", - "value": 0.05063809222568324 + "value": "0.37340104364081084" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006171294842507731" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "849" } } }, "is_skipped": false }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^18": { + "Device=0 In=I8 Out=I32": { "device": 0, - "type_config_index": 3, + "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U32" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 + "value": "I32" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -4742,11 +10093,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 1048576 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" } }, "Number of Samples (Cold)": { @@ -4764,7 +10129,7 @@ }, "value": { "type": "int64", - "value": 1055 + "value": "660" } }, "Average CPU Time (Cold)": { @@ -4782,7 +10147,7 @@ }, "value": { "type": "float64", - "value": 0.00021251706161137438 + "value": "0.0007373176257575753" } }, "CPU Relative Standard Deviation (Cold)": { @@ -4800,7 +10165,7 @@ }, "value": { "type": "float64", - "value": 0.06362320460336848 + "value": "0.0039374442120074255" } }, "Average GPU Time (Cold)": { @@ -4818,7 +10183,7 @@ }, "value": { "type": "float64", - "value": 0.00020548479910145425 + "value": "0.0007318062056194664" } }, "GPU Relative Standard Deviation (Cold)": { @@ -4836,7 +10201,7 @@ }, "value": { "type": "float64", - "value": 0.06357499446836187 + "value": "0.003968882228293087" } }, "Element Throughput": { @@ -4854,7 +10219,7 @@ }, "value": { "type": "float64", - "value": 1275734269.1347759 + "value": "91703054011.67381" } }, "Average Global Memory Throughput": { @@ -4872,7 +10237,7 @@ }, "value": { "type": "float64", - "value": 10205874153.078207 + "value": "458515270058.369" } }, "Percent Peak Global Memory Throughput": { @@ -4890,13 +10255,49 @@ }, "value": { "type": "float64", - "value": 0.07971346345505972 + "value": "0.5267868451957365" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007297926682692308" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "715" } } }, "is_skipped": false }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20": { + "Device=0 In=I8 Out=F32": { "device": 0, "type_config_index": 3, "min_samples": 10, @@ -4905,24 +10306,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U32" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 + "value": "F32" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -4930,11 +10333,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 4194304 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" } }, "Number of Samples (Cold)": { @@ -4952,7 +10369,7 @@ }, "value": { "type": "int64", - "value": 328 + "value": "655" } }, "Average CPU Time (Cold)": { @@ -4970,7 +10387,7 @@ }, "value": { "type": "float64", - "value": 0.0006486628048780487 + "value": "0.0007424307770992365" } }, "CPU Relative Standard Deviation (Cold)": { @@ -4988,7 +10405,7 @@ }, "value": { "type": "float64", - "value": 0.062073612578813026 + "value": "0.004224076490124651" } }, "Average GPU Time (Cold)": { @@ -5006,7 +10423,7 @@ }, "value": { "type": "float64", - "value": 0.0006387051705543588 + "value": "0.0007369820818646267" } }, "GPU Relative Standard Deviation (Cold)": { @@ -5024,7 +10441,7 @@ }, "value": { "type": "float64", - "value": 0.019862156160162552 + "value": "0.004224007291086274" } }, "Element Throughput": { @@ -5042,7 +10459,7 @@ }, "value": { "type": "float64", - "value": 1641721483.3095798 + "value": "91059017106.91382" } }, "Average Global Memory Throughput": { @@ -5060,7 +10477,7 @@ }, "value": { "type": "float64", - "value": 13133771866.476639 + "value": "455295085534.5691" } }, "Percent Peak Global Memory Throughput": { @@ -5078,795 +10495,49 @@ }, "value": { "type": "float64", - "value": 0.10258194722004373 + "value": "0.5230871846674737" } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 16777216 + "type": "float64", + "value": "0.0007352958009195734" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 87 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0022856022988505752 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01813131000490793 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.002277155692549959 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.018170531150182676 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1841904799.800148 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14735238398.401184 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11509027741815471 + "value": "706" } } }, "is_skipped": false }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 22 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.008837786363636363 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007500416221662679 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.008825995575297962 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007696619972960275 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1900886518.3385963 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15207092146.70877 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11877571346779532 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 6 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.035352516666666674 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006577426327632489 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.03533673604329427 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006429928650474585 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1899124580.090781 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15192996640.726248 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11866561985071113 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.1430682 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.14305706024169923 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1876422285.9498873 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15011378287.599098 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11724708110159256 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^16": { + "Device=0 In=I8 Out=I64": { "device": 0, "type_config_index": 4, "min_samples": 10, @@ -5875,24 +10546,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U64" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "I64" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -5900,11 +10573,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 524288 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "536870912" } }, "Number of Samples (Cold)": { @@ -5922,7 +10609,7 @@ }, "value": { "type": "int64", - "value": 1413 + "value": "407" } }, "Average CPU Time (Cold)": { @@ -5940,7 +10627,7 @@ }, "value": { "type": "float64", - "value": 0.00018726730360934168 + "value": "0.0012081452088452092" } }, "CPU Relative Standard Deviation (Cold)": { @@ -5958,7 +10645,7 @@ }, "value": { "type": "float64", - "value": 0.19158497176050596 + "value": "0.009488803674295956" } }, "Average GPU Time (Cold)": { @@ -5976,7 +10663,7 @@ }, "value": { "type": "float64", - "value": 0.00017967336391272188 + "value": "0.0012026181030624918" } }, "GPU Relative Standard Deviation (Cold)": { @@ -5994,7 +10681,7 @@ }, "value": { "type": "float64", - "value": 0.1929620201640373 + "value": "0.009557481496348873" } }, "Element Throughput": { @@ -6012,7 +10699,7 @@ }, "value": { "type": "float64", - "value": 364750782.04599524 + "value": "55802306508.69623" } }, "Average Global Memory Throughput": { @@ -6030,7 +10717,7 @@ }, "value": { "type": "float64", - "value": 5836012512.735924 + "value": "502220758578.26605" } }, "Percent Peak Global Memory Throughput": { @@ -6048,14 +10735,4308 @@ }, "value": { "type": "float64", - "value": 0.045582452142713725 + "value": "0.5769999524106917" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0012018776918068911" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "429" } } }, "is_skipped": false }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^18": { + "Device=0 In=I8 Out=F64": { "device": 0, + "type_config_index": 5, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "536870912" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "415" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011839576867469879" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007849889234045669" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011785369482385114" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007894267956792296" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "56942520215.682335" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512482681941.14105" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5887898459801713" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011769149367873732" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "444" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I16 Out=I8": { + "device": 0, + "type_config_index": 6, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I16 Out=I16": { + "device": 0, + "type_config_index": 7, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=0 In=I16 Out=I32": { + "device": 0, + "type_config_index": 8, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1105" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00043164991312217215" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.010807224020750296" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004261522234295282" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.010967121645138193" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "78738136645.08269" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "472428819870.49615" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.542772081652684" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004235287455769328" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1232" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I16 Out=F32": { + "device": 0, + "type_config_index": 9, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1103" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004325297388939259" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008132056614414322" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004270972975631891" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008246119473642231" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "78563906143.74146" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "471383436862.44867" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5415710441893942" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004253676273859287" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1241" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I16 Out=I64": { + "device": 0, + "type_config_index": 10, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "733" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006613082878581173" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007945982231516395" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006558368572759433" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008081739278450547" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "51162772612.94874" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "511627726129.48737" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5878075897627383" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006543398455948913" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "794" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I16 Out=F64": { + "device": 0, + "type_config_index": 11, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "734" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006602106062670296" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007402870806519008" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006547767629577938" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007440276584065623" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "51245605980.92404" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512456059809.2404" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.588759259891131" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006532440560455698" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "814" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I32 Out=I8": { + "device": 0, + "type_config_index": 12, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I32 Out=I16": { + "device": 0, + "type_config_index": 13, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I32 Out=I32": { + "device": 0, + "type_config_index": 14, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=0 In=I32 Out=F32": { + "device": 0, + "type_config_index": 15, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1735" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002670209631123916" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.013093461911533781" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002615653075986357" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.013401071612529727" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "64141594900.43743" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "513132759203.49945" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5895367178349029" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00025968648747412565" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2023" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I32 Out=I64": { + "device": 0, + "type_config_index": 16, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1233" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00038442103811841063" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00844365555913354" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.000379033355382238" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008658982321334757" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44263165132.474785" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "531157981589.6974" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6102458428190457" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037769805444141474" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1381" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I32 Out=F64": { + "device": 0, + "type_config_index": 17, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1233" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0003844277526358471" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009146640310310188" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037900421011283686" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009309598787035932" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44266568951.84647" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "531198827422.15765" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6102927704758245" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003778626589998872" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1385" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F32 Out=I8": { + "device": 0, + "type_config_index": 18, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F32 Out=I16": { + "device": 0, + "type_config_index": 19, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F32 Out=I32": { + "device": 0, + "type_config_index": 20, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1726" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00026862367844727737" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.013398984730737511" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002631768242198105" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.013647890011152018" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "63748835216.53615" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "509990681732.2892" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5859267942696337" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002611921188044063" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1917" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F32 Out=F32": { + "device": 0, + "type_config_index": 21, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=0 In=F32 Out=I64": { + "device": 0, + "type_config_index": 22, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1235" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0003840306607287451" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009229816202298303" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037859300990336473" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009349966037483663" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44314648081.54371" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "531775776978.5245" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6109556261242239" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037728447759194497" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1385" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F32 Out=F64": { + "device": 0, + "type_config_index": 23, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1233" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0003844510843471213" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009332888492996015" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037903595060134466" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009455810939030309" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44262862067.25975" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "531154344807.117" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6102416645302355" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037756963876577526" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1352" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I64 Out=I8": { + "device": 0, + "type_config_index": 24, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I64 Out=I16": { + "device": 0, + "type_config_index": 25, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I64 Out=I32": { + "device": 0, + "type_config_index": 26, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I64 Out=F32": { + "device": 0, + "type_config_index": 27, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I64 Out=I64": { + "device": 0, + "type_config_index": 28, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=0 In=I64 Out=F64": { + "device": 0, + "type_config_index": 29, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "8388608" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1863" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00024704845947396656" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008865365856503242" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00024161205912269328" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009044628498958832" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "34719326636.507706" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "555509226184.1233" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.638222916112274" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00023959052166815616" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2168" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F64 Out=I8": { + "device": 0, + "type_config_index": 30, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F64 Out=I16": { + "device": 0, + "type_config_index": 31, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F64 Out=I32": { + "device": 0, + "type_config_index": 32, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F64 Out=F32": { + "device": 0, + "type_config_index": 33, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F64 Out=I64": { + "device": 0, + "type_config_index": 34, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "8388608" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1863" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002471723081052067" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008513791485233733" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00024173997316990882" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008792904583053216" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "34700955286.79488" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "555215284588.7181" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6378852074778472" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002398694754203643" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2141" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F64 Out=F64": { + "device": 0, + "type_config_index": 35, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=I8 Out=I8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=I8 Out=I16": { + "device": 1, + "type_config_index": 1, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "704" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006917278508522718" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.031528564248938934" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006868111818859521" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.03156956554686085" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "97710791218.80649" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "293132373656.4195" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.4003665505578282" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.000659236081199501" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "789" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I8 Out=I32": { + "device": 1, + "type_config_index": 2, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "568" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0008635000933098584" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007603813185561577" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008587616908718169" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0076405577469716105" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "78146084895.6489" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "390730424478.2445" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5336680841322177" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008576233512476871" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "608" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I8 Out=F32": { + "device": 1, + "type_config_index": 3, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "568" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0008623903292253519" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007683250202065139" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008576831001211219" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0077139887360741476" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "78244358540.49461" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "391221792702.473" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5343392055049074" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008563666250191483" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "612" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I8 Out=I64": { + "device": 1, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, @@ -6063,24 +15044,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U64" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 + "value": "I64" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -6088,11 +15071,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 2097152 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "536870912" } }, "Number of Samples (Cold)": { @@ -6110,7 +15107,7 @@ }, "value": { "type": "int64", - "value": 593 + "value": "339" } }, "Average CPU Time (Cold)": { @@ -6128,7 +15125,7 @@ }, "value": { "type": "float64", - "value": 0.0005387549747048903 + "value": "0.0014575299587020652" } }, "CPU Relative Standard Deviation (Cold)": { @@ -6146,7 +15143,7 @@ }, "value": { "type": "float64", - "value": 0.04111938777242051 + "value": "0.005325090452654586" } }, "Average GPU Time (Cold)": { @@ -6164,7 +15161,7 @@ }, "value": { "type": "float64", - "value": 0.0005312656186282335 + "value": "0.0014528400236878067" } }, "GPU Relative Standard Deviation (Cold)": { @@ -6182,7 +15179,7 @@ }, "value": { "type": "float64", - "value": 0.04063673179425505 + "value": "0.005339082380112657" } }, "Element Throughput": { @@ -6200,7 +15197,7 @@ }, "value": { "type": "float64", - "value": 493433022.59399897 + "value": "46191502784.76956" } }, "Average Global Memory Throughput": { @@ -6218,7 +15215,7 @@ }, "value": { "type": "float64", - "value": 7894928361.5039835 + "value": "415723525062.9261" } }, "Percent Peak Global Memory Throughput": { @@ -6236,826 +15233,50 @@ }, "value": { "type": "float64", - "value": 0.0616637118962758 + "value": "0.56780420271925" } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 8388608 + "type": "float64", + "value": "0.00145044431581602" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 181 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0018093104972375699 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04001254538809503 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0017961900550357542 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01423183226589721 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 583777867.5258992 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9340445880.414387 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.0729539949420019 + "value": "364" } } }, "is_skipped": false }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 47 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00701123404255319 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.022882268388176443 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.006997501982019303 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.023591403635686216 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 599400187.492284 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9590402999.876545 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07490629686231992 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 12 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.027476083333333335 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.009319641724100018 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.02746358140309652 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.009143918373427004 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 610889590.6091974 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9774233449.747158 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07634211329782521 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 3 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.11105753333333335 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.11104602813720703 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 604333762.5464745 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9669340200.743591 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07552283960840721 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I8 Out=F64": { + "device": 1, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, @@ -7063,24 +15284,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "F64" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -7088,11 +15311,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 65536 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "536870912" } }, "Number of Samples (Cold)": { @@ -7110,7 +15347,7 @@ }, "value": { "type": "int64", - "value": 3087 + "value": "339" } }, "Average CPU Time (Cold)": { @@ -7128,7 +15365,7 @@ }, "value": { "type": "float64", - "value": 5.5682150955620366e-05 + "value": "0.0014595411091445434" } }, "CPU Relative Standard Deviation (Cold)": { @@ -7146,7 +15383,7 @@ }, "value": { "type": "float64", - "value": 0.15974697309068234 + "value": "0.00536065113916752" } }, "Average GPU Time (Cold)": { @@ -7164,7 +15401,7 @@ }, "value": { "type": "float64", - "value": 4.976085047405057e-05 + "value": "0.0014548183609250722" } }, "GPU Relative Standard Deviation (Cold)": { @@ -7182,7 +15419,7 @@ }, "value": { "type": "float64", - "value": 0.1566862558828566 + "value": "0.005369023538480178" } }, "Element Throughput": { @@ -7200,7 +15437,7 @@ }, "value": { "type": "float64", - "value": 1317019290.7811313 + "value": "46128689190.67507" } }, "Average Global Memory Throughput": { @@ -7218,7 +15455,7 @@ }, "value": { "type": "float64", - "value": 2634038581.5622625 + "value": "415158202716.0756" } }, "Percent Peak Global Memory Throughput": { @@ -7236,1330 +15473,50 @@ }, "value": { "type": "float64", - "value": 0.020573283097680757 + "value": "0.567032073202682" } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 262144 + "type": "float64", + "value": "0.00145255855984158" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 1508 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 8.478799734748009e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.18692260194964838 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 7.815668975504499e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.07663177112866752 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3354082687.247878 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 6708165374.495756 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.052394443377403746 + "value": "360" } } }, "is_skipped": false }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 493 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.000172661663286004 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.03967760002514821 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0001667072121922918 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.03756877297429747 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6289925829.9065 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 12579851659.813 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.09825552721048644 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 126 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.000547765079365079 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02267930122532969 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0005414133327347893 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.020125269100866486 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 7746953660.734054 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15493907321.468107 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12101589697472591 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 31 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0019378612903225812 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.011107088633221586 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0019301894287909237 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01047020541368632 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8692004913.999191 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17384009827.998383 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13577863212320657 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 8 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0075735125 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.008208621847337904 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.007558403968811036 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006689815940438041 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8878708293.036163 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17757416586.072327 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13869514329286683 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.03057025 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.03056054401397705 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8783726358.968918 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17567452717.937836 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13721142150351345 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.1411317 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.14112380981445313 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 7608509332.420483 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15217018664.840965 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11885324500781809 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=I8": { + "device": 1, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, @@ -8567,1345 +15524,21 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "I16" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 131072 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2444 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 8.023633387888718e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.15957149507307994 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 7.409614451997878e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.11997847029431298 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 884472470.5254984 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 3537889882.1019936 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.027632856489799375 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 524288 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1258 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00014721875993640703 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.05030707755378517 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00014142066840858286 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04965983055115567 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1853647016.026198 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 7414588064.104792 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.05791199125300543 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 395 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00038132582278481046 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.022492628088386854 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.000374839979561069 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.020997160180716862 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2797396374.922077 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 11189585499.688309 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.08739678751943505 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 107 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0011724710280373834 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.019342776294093793 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0011632810418850906 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.016095597179303357 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3605580980.846342 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14422323923.385368 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11264624409042559 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 27 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00414218888888889 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010143570260672414 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.004132891301755552 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01004138981574669 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4059437999.9477477 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16237751999.79099 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12682573106560072 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.015818842857142856 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004297912986520931 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.015806600979396273 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004417201153941865 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4245622704.5571437 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16982490818.228575 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1326425488801907 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.06584135 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.06581772994995116 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4078467248.9331145 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16313868995.732458 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12742024646754294 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 + "value": "I8" } }, "summaries": null, "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=I16": { + "device": 1, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, @@ -9913,1345 +15546,21 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "I32" + "value": "I16" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 262144 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2237 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 8.876477425122915e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.35106187182872767 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 8.160484933232342e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.2520628318430057 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 803089528.823398 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 6424716230.587184 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.05018055041385891 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1033 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0002160535333978701 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.1188318605057773 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00020900739510142425 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.12132837233117388 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1254233133.1041677 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10033865064.833342 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07836997832442938 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 324 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0006508543209876547 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.017018531103233366 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0006438862221476476 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01711700991339697 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1628511317.578021 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 13028090540.624168 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.10175651821907154 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 84 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.002322446428571428 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0213207920280134 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.002313739804994491 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02154045421700801 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1812781191.2757347 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14502249530.205877 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1132705068280264 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 21 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.008975690476190476 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.008053073918408916 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.008964469319298155 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007960830786781505 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1871523611.987053 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14972188895.896423 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11694099050156542 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 5 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.035879860000000006 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007059995549678311 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.03586985015869141 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0070565543246424054 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1870898922.1617713 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14967191377.29417 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11690195714582426 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.1458295 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.14581488037109375 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1840933211.458537 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14727465691.668297 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11502956832407756 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 + "value": "I16" } }, "summaries": null, "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" + "skip_reason": "Not a conversion: InputType == OutputType." }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=I32": { + "device": 1, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, @@ -11259,24 +15568,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "I64" + "value": "I16" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "I32" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -11284,11 +15595,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 524288 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" } }, "Number of Samples (Cold)": { @@ -11306,7 +15631,7 @@ }, "value": { "type": "int64", - "value": 1442 + "value": "1042" } }, "Average CPU Time (Cold)": { @@ -11324,7 +15649,7 @@ }, "value": { "type": "float64", - "value": 0.00018916165048543735 + "value": "0.00046152892994241876" } }, "CPU Relative Standard Deviation (Cold)": { @@ -11342,7 +15667,7 @@ }, "value": { "type": "float64", - "value": 0.08474054695372502 + "value": "0.007446740614945881" } }, "Average GPU Time (Cold)": { @@ -11360,7 +15685,7 @@ }, "value": { "type": "float64", - "value": 0.00018243015832403314 + "value": "0.00045683037259413987" } }, "GPU Relative Standard Deviation (Cold)": { @@ -11378,7 +15703,7 @@ }, "value": { "type": "float64", - "value": 0.08522171353418057 + "value": "0.007515900606668647" } }, "Element Throughput": { @@ -11396,7 +15721,7 @@ }, "value": { "type": "float64", - "value": 359238848.4561566 + "value": "73450527839.16064" } }, "Average Global Memory Throughput": { @@ -11414,7 +15739,7 @@ }, "value": { "type": "float64", - "value": 5747821575.298506 + "value": "440703167034.96387" } }, "Percent Peak Global Memory Throughput": { @@ -11432,1014 +15757,50 @@ }, "value": { "type": "float64", - "value": 0.04489363264885736 + "value": "0.6019219392413733" } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 2097152 + "type": "float64", + "value": "0.0004549347768605374" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 588 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0005411481292517009 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02452569809177557 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0005339167867185301 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.023483747081537745 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 490982876.9594332 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 7855726031.350931 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.061357520239869186 + "value": "1156" } } }, "is_skipped": false }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 182 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.001794480219780219 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01043393384097355 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.001785578369439303 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.009622529095864946 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 587247257.217429 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9395956115.478865 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.0733875602621131 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 47 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.007055212765957448 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.022872858397093812 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.007045798778533936 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02290392030641084 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 595291482.4616572 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9524663719.386515 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07439283709843254 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 12 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.027424558333333335 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0056934303158599825 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.027411389350891106 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005639627987530259 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 612052741.4804167 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9792843863.686666 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07648747081734775 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 3 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.11087960000000001 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.11086710357666014 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 605309075.7764491 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9684945212.423185 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07564472329123333 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=F32": { + "device": 1, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, @@ -12447,24 +15808,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { "type": "string", "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -12472,11 +15835,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 262144 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" } }, "Number of Samples (Cold)": { @@ -12494,7 +15871,7 @@ }, "value": { "type": "int64", - "value": 2324 + "value": "1046" } }, "Average CPU Time (Cold)": { @@ -12512,7 +15889,7 @@ }, "value": { "type": "float64", - "value": 8.308123924268518e-05 + "value": "0.0004599197934990448" } }, "CPU Relative Standard Deviation (Cold)": { @@ -12530,7 +15907,7 @@ }, "value": { "type": "float64", - "value": 0.20123216459376678 + "value": "0.007608321516935087" } }, "Average GPU Time (Cold)": { @@ -12548,7 +15925,7 @@ }, "value": { "type": "float64", - "value": 7.645772117159609e-05 + "value": "0.00045521636728916755" } }, "GPU Relative Standard Deviation (Cold)": { @@ -12566,7 +15943,7 @@ }, "value": { "type": "float64", - "value": 0.2163426509558451 + "value": "0.007662230748478094" } }, "Element Throughput": { @@ -12584,7 +15961,7 @@ }, "value": { "type": "float64", - "value": 857153456.7832047 + "value": "73710952441.8422" } }, "Average Global Memory Throughput": { @@ -12602,7 +15979,7 @@ }, "value": { "type": "float64", - "value": 6857227654.265637 + "value": "442265714651.0532" } }, "Percent Peak Global Memory Throughput": { @@ -12620,1172 +15997,50 @@ }, "value": { "type": "float64", - "value": 0.05355870137360689 + "value": "0.604056100648838" } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 1048576 + "type": "float64", + "value": "0.0004532274742649026" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 1081 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00018608593894542115 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.061054319816048364 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00017904156912376654 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.058580426390462105 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1464151600.563705 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 11713212804.50964 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.09148660338438547 + "value": "1168" } } }, "is_skipped": false }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 361 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0004975271468144047 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.014379246069577772 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0004902403537091129 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.012272733824773483 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2138901851.0340319 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17111214808.272255 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1336479537012017 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 96 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.001751761458333334 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005879019262093316 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.001744055998822054 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005818847781913598 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2404913605.315918 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 19239308842.527344 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15026953294900763 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 24 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.006750754166666667 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.002444731344335298 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0067418733040491745 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0023841178656418723 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2488509534.8682375 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 19908076278.9459 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15549297268609333 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 6 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.026754200000000006 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0006428245182255163 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.026743643124898273 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0006056040012260031 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2509338899.2138395 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20074711193.710716 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1567944825802199 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.10674365000000001 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.10673254394531251 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2515029119.3053603 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20120232954.442883 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1571500324484729 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=I64": { + "device": 1, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, @@ -13793,776 +16048,2516 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "648" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0007539361157407405" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005745552244274178" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007492513590388824" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005767470540109363" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44783945461.29704" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "447839454612.97046" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6116688355181524" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007466272232380319" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "705" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I16 Out=F64": { + "device": 1, + "type_config_index": 11, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "650" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0007517909569230775" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0052243182117119895" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007470858345581929" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005225121011834867" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44913757493.26477" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "449137574932.64764" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6134418363918374" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007439345558090966" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "707" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I32 Out=I8": { + "device": 1, + "type_config_index": 12, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I32 Out=I16": { + "device": 1, + "type_config_index": 13, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I32 Out=I32": { + "device": 1, + "type_config_index": 14, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=I32 Out=F32": { + "device": 1, + "type_config_index": 15, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1687" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002777349045643155" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005691592315009916" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.000273079350458212" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005675718906016491" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "61437146279.45599" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "491497170235.64795" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6712974899416083" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002715061958589702" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1930" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I32 Out=I64": { + "device": 1, + "type_config_index": 16, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1133" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004230005507502205" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004752057993974487" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041831812217818477" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004789005696384007" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "40106357125.149025" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "481276285501.78827" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6573375840004757" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041601362464715726" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1251" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I32 Out=F64": { + "device": 1, + "type_config_index": 17, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1132" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004233390768551238" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004579872590098746" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041865752666346193" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004575612011474384" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "40073843013.66299" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "480886116163.9559" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6568046822606478" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004163219633556548" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1260" + } + } + }, + "is_skipped": false + }, + "Device=1 In=F32 Out=I8": { + "device": 1, + "type_config_index": 18, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=F32 Out=I16": { + "device": 1, + "type_config_index": 19, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=F32 Out=I32": { + "device": 1, + "type_config_index": 20, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1665" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00028157421321321324" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.01259188984622349" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002768973456309726" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0128083650869227" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "60590021048.303505" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "484720168386.42804" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6620413139019177" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002751490314863719" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1917" + } + } + }, + "is_skipped": false + }, + "Device=1 In=F32 Out=F32": { + "device": 1, + "type_config_index": 21, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=F32 Out=I64": { + "device": 1, + "type_config_index": 22, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1133" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004230867334510152" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004737837167796919" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004184002545904713" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0047584325554732645" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "40098484204.84705" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "481181810458.1647" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6572085479378342" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004160796998517786" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1265" + } + } + }, + "is_skipped": false + }, + "Device=1 In=F32 Out=F64": { + "device": 1, + "type_config_index": 23, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1132" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004234168127208481" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004661976745113187" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041872537112383403" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0046895661377312735" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "40067350003.108116" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "480808200037.2974" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6566982627257668" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041628507170249205" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1259" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I64 Out=I8": { + "device": 1, + "type_config_index": 24, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I64 Out=I16": { + "device": 1, + "type_config_index": 25, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I64 Out=I32": { + "device": 1, + "type_config_index": 26, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I64 Out=F32": { + "device": 1, + "type_config_index": 27, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I64 Out=I64": { + "device": 1, + "type_config_index": 28, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=I64 Out=F64": { + "device": 1, + "type_config_index": 29, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "8388608" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1753" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002666010844266969" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004049936615976281" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00026192253768546044" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00396300201275967" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "32027056831.87056" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512432909309.92896" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6998919762209476" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00026010225147830515" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1995" + } + } + }, + "is_skipped": false + }, + "Device=1 In=F64 Out=I8": { + "device": 1, + "type_config_index": 30, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "I8" } }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 524288 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1446 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00017735048409405266 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.11011368701446163 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00017031459973024972 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.1112846952116118 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 384793788.1062353 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 6156700609.699765 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.048087201712851205 - } - } - }, - "is_skipped": false + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=I16": { + "device": 1, + "type_config_index": 31, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 + "value": "I16" } }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 614 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0005115635179153098 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.014363302118375774 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.000504564481760081 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.013743392821332084 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 519545091.8097892 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 8312721468.956627 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.06492690475003614 - } - } - }, - "is_skipped": false + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=I32": { + "device": 1, + "type_config_index": 32, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 + "value": "I32" } }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 187 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0017560828877005348 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007271133357282276 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0017482294819571758 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007054498367435898 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 599793111.1572946 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9596689778.516714 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07495540004465066 - } - } - }, - "is_skipped": false + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=F32": { + "device": 1, + "type_config_index": 33, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 + "value": "F32" } }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 50 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0067117 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0020046252309587377 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.006701972465515136 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.001795830890697078 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 625831279.0125157 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10013300464.20025 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07820935753718017 - } - } - }, - "is_skipped": false + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=I64": { + "device": 1, + "type_config_index": 34, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 + "value": "I64" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "8388608" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -14570,11 +18565,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 134217728 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" } }, "Number of Samples (Cold)": { @@ -14592,7 +18601,7 @@ }, "value": { "type": "int64", - "value": 13 + "value": "1753" } }, "Average CPU Time (Cold)": { @@ -14610,7 +18619,7 @@ }, "value": { "type": "float64", - "value": 0.02650104615384615 + "value": "0.00026658457387335985" } }, "CPU Relative Standard Deviation (Cold)": { @@ -14628,7 +18637,7 @@ }, "value": { "type": "float64", - "value": 0.0007265940345306042 + "value": "0.004159876144452023" } }, "Average GPU Time (Cold)": { @@ -14646,7 +18655,7 @@ }, "value": { "type": "float64", - "value": 0.026490544979388894 + "value": "0.0002619018185261111" } }, "GPU Relative Standard Deviation (Cold)": { @@ -14664,7 +18673,7 @@ }, "value": { "type": "float64", - "value": 0.00072958672729313 + "value": "0.004108154173032601" } }, "Element Throughput": { @@ -14682,7 +18691,7 @@ }, "value": { "type": "float64", - "value": 633328457.8725579 + "value": "32029590505.3583" } }, "Average Global Memory Throughput": { @@ -14700,7 +18709,7 @@ }, "value": { "type": "float64", - "value": 10133255325.960926 + "value": "512473448085.7328" } }, "Percent Peak Global Memory Throughput": { @@ -14718,8326 +18727,69 @@ }, "value": { "type": "float64", - "value": 0.07914627066640313 + "value": "0.6999473449597531" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002600505164606654" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2010" } } }, "is_skipped": false }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=F64": { + "device": 1, + "type_config_index": 35, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 3 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.10620103333333335 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.10618826548258464 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 631980037.4835782 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10111680599.737251 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07897776024538593 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 10, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { + "Out": { "type": "string", "value": "F64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 } }, "summaries": null, "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 10, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - } - } - }, - { - "index": 1, - "name": "cub::DeviceRadixSort::SortKeys - Constant Values", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "devices": [ - 0 - ], - "axes": { - "Key": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "U8", - "description": "uint8_t", - "is_active": true - }, - { - "input_string": "U16", - "description": "uint16_t", - "is_active": true - }, - { - "input_string": "U32", - "description": "uint32_t", - "is_active": true - }, - { - "input_string": "U64", - "description": "uint64_t", - "is_active": true - } - ] - }, - "Input": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "Const", - "description": "All values = 42", - "is_active": true - } - ] - }, - "Pattern": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "Ascend", - "description": "", - "is_active": true - } - ] - }, - "Elements": { - "type": "int64", - "flags": "pow2", - "values": [ - { - "input_string": "20", - "description": "2^20 = 1048576", - "value": 1048576 - }, - { - "input_string": "22", - "description": "2^22 = 4194304", - "value": 4194304 - }, - { - "input_string": "24", - "description": "2^24 = 16777216", - "value": 16777216 - }, - { - "input_string": "26", - "description": "2^26 = 67108864", - "value": 67108864 - }, - { - "input_string": "28", - "description": "2^28 = 268435456", - "value": 268435456 - }, - { - "input_string": "30", - "description": "2^30 = 1073741824", - "value": 1073741824 - } - ] - } - }, - "states": { - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2566 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 9.6390140296181e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.045598791758050095 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 9.036228858044716e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.033692751648895305 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 11604132835.419285 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 23208265670.83857 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1812692582388666 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1160 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0003043432758620684 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.014476575112824767 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00029814910513573545 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.011313650678305812 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 14067806771.013115 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 28135613542.02623 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.21975454216153953 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 363 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.001116191460055097 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.011463805762636945 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0011063706461063093 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.008802868537347153 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15164191185.878504 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 30328382371.757008 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.23688126696261097 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 100 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.004274347 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0032816652112393967 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.004266441283226011 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0032308789304643024 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15729470897.40716 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31458941794.81432 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24571155488326604 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 26 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.017081826923076922 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010556997792740205 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.017073184013366702 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010548301451256467 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15722635906.099308 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31445271812.198616 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24560478483659254 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.06782757142857143 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.002248092939308053 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.06781701987130302 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.002251004801379183 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15832925510.994875 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31665851021.98975 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24732762920199441 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1374 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00025053580786026195 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.06364830482823801 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0002446616114899394 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.06504028362673961 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4285821521.4654465 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17143286085.861786 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13389844793381175 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 484 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.000845314876033058 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0042877181599594 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0008389603308409695 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004147125806742644 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4999406820.338754 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 19997627281.355015 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1561924150318281 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 138 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.003167936956521741 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.009915660724473297 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0031528834553732395 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.001304193155702654 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 5321229356.387328 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 21284917425.549313 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1662468556731857 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 37 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.012415567567567568 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0020929194181039177 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.012407164470569507 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0020887980997006564 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 5408880019.216801 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 21635520076.867203 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1689852542869533 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 10 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.04940072 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.000576076970718073 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.04938988418579101 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0005818752831350183 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 5435029063.648347 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 21740116254.593388 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.16980220768708906 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 804 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00047247549751243796 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.012916467373333794 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0004640141693291389 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005379896492658131 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2259793060.8800316 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 18078344487.040253 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.14120176586353608 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 251 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.001718760956175299 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.008648709803676304 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0017113251937813012 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.008654677513055743 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2450909982.0662203 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 19607279856.529762 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15314358798214323 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 68 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.006638683823529413 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.002767845306863416 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0066234131490483005 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0015437824831612992 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2533016682.2540236 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20264133458.03219 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15827397414733962 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 18 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.02638116111111112 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0035695573142047353 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.02635982047186958 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0034495125905173608 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2545877126.576662 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20367017012.613297 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15907755102328555 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 5 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.10501874 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0005592643361183458 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.10500200805664064 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.00055006156023586 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2556479261.3794527 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20451834091.03562 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15974001883150793 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 254 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0017053511811023635 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005201838970683627 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0016952332624300263 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0035478470341980604 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 618543785.8250388 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9896700573.20062 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07729864856598835 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 71 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.006611254929577467 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.003663027154345941 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0066021115477655976 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0036695333152667172 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 635297354.4379919 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10164757671.00787 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07939232122444287 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 19 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.026109178947368427 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0007116177577395647 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.026097561384502206 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0007337748011409093 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 642865275.9089972 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10285844414.543955 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.08033807496988217 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 5 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.10435436000000002 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0008827267294601751 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.10434051666259767 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0009079744888650902 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 643171666.6403677 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10290746666.245884 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.08037636423898623 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - } - } - }, - { - "index": 2, - "name": "cub::DeviceRadixSort::SortKeys - Half Word", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "devices": [ - 0 - ], - "axes": { - "Key": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "U8", - "description": "uint8_t", - "is_active": true - }, - { - "input_string": "U16", - "description": "uint16_t", - "is_active": true - }, - { - "input_string": "U32", - "description": "uint32_t", - "is_active": true - }, - { - "input_string": "U64", - "description": "uint64_t", - "is_active": true - } - ] - }, - "Input": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "Rand", - "description": "Random values uniformly distributed across `T`'s value range", - "is_active": true - } - ] - }, - "Pattern": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "Ascend", - "description": "", - "is_active": true - } - ] - }, - "Elements": { - "type": "int64", - "flags": "pow2", - "values": [ - { - "input_string": "20", - "description": "2^20 = 1048576", - "value": 1048576 - }, - { - "input_string": "22", - "description": "2^22 = 4194304", - "value": 4194304 - }, - { - "input_string": "24", - "description": "2^24 = 16777216", - "value": 16777216 - }, - { - "input_string": "26", - "description": "2^26 = 67108864", - "value": 67108864 - }, - { - "input_string": "28", - "description": "2^28 = 268435456", - "value": 268435456 - }, - { - "input_string": "30", - "description": "2^30 = 1073741824", - "value": 1073741824 - } - ] - }, - "Bits": { - "type": "string", - "flags": "", - "values": [ - { - "input_string": "Half", - "description": "", - "value": "Half" - } - ] - } - }, - "states": { - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 538 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 8.960966542750939e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.08397008166379398 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 8.340330078477751e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.045910043356146846 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 12572356131.394053 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 25144712262.788105 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.19639396606151668 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 131 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00028253587786259543 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04123287912799956 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0002716179535589143 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.037808745584459125 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15441924751.451488 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 30883849502.902977 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24121976929910474 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 33 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0009550272727272727 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007063558495008569 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0009488116340203719 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007360718544290043 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 17682346419.92151 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 35364692839.84302 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.27621760840917126 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 8 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.003773775 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.014705369350832631 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0037657760083675386 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.014874000832241253 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 17820726418.90659 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 35641452837.81318 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2783792554815451 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.014729750000000002 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.01471895980834961 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 18237393096.74077 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 36474786193.48154 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2848880451252932 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.06453160000000001 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.06452188873291016 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 16641512594.97035 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 33283025189.9407 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2599586446352529 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 450 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00025646377777777785 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.021618265944340534 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00025038620548115814 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.022009562632867193 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4187834541.3837366 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16751338165.534946 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13083712013820722 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 116 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0007816620689655175 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.020853227399617164 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0007727365504051081 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.016658278118279097 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 5427857654.463388 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 21711430617.853554 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.16957815716269023 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 29 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.002729172413793104 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007205020379178172 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.002722046876775808 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007340614282676166 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6163455943.077721 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 24653823772.310883 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.19255985825661462 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0105532 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005970051554930724 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.010544246673583986 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005939659771636435 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6364500573.390865 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 25458002293.56346 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1988409326852932 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0433476 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.04333521652221679 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6194395171.935518 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 24777580687.742073 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1935264675061084 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 403 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00035955682382134016 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04131883733262655 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.000348795553294956 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.031411058442745776 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3006276857.874047 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 24050214862.992374 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.18784534228155753 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 103 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0012605281553398058 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0430687589837951 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0012512904081529784 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.03898592591253357 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3351982859.191884 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 26815862873.535072 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.20944656705772832 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 26 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.004762250000000001 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004982744499450167 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.004748374150349544 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0038811646473003495 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3533254850.771389 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 28266038806.171112 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.22077323486449568 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.01901851428571429 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010872125017202998 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.019009632383074078 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01087158253950949 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3530255748.646293 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 28242045989.170345 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.22058583783093558 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.07661000000000001 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.07659774398803712 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3504482534.6543326 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 28035860277.23466 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.21897541456225522 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 270 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0009441937037037035 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.08572346375827458 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0009308705164326564 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.019541093452539494 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1126446677.0506625 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 18023146832.8106 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1407706419708401 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 68 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0036803838235294115 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04345566028172002 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0036717261111035055 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04355542870677633 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1142324855.6901317 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 18277197691.042107 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.14275491823170852 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 18 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.01408933888888889 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0017506825580184378 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.014078581280178495 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0016365268643609497 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1191683712.024376 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 19066939392.390015 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1489232331947483 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 5 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.05700572 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0126927638236131 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.05699455947875977 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.012703536689392532 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1177460877.2089825 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 18839374035.34372 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.147145823195324 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" + "skip_reason": "Not a conversion: InputType == OutputType." } } } diff --git a/scripts/test_ref.json b/scripts/test_ref.json index 89b8d8f..557a9c7 100644 --- a/scripts/test_ref.json +++ b/scripts/test_ref.json @@ -2,21 +2,42 @@ "devices": [ { "id": 0, - "name": "NVIDIA GeForce GTX 1650", - "sm_version": 750, - "ptx_version": 750, - "sm_default_clock_rate": 1560000000, - "number_of_sms": 16, - "max_blocks_per_sm": 16, - "max_threads_per_sm": 1024, + "name": "NVIDIA Quadro GV100", + "sm_version": 700, + "ptx_version": 700, + "sm_default_clock_rate": 1627000000, + "number_of_sms": 80, + "max_blocks_per_sm": 32, + "max_threads_per_sm": 2048, "max_threads_per_block": 1024, "registers_per_sm": 65536, "registers_per_block": 65536, - "global_memory_size": 4294967296, - "global_memory_bus_peak_clock_rate": 4001000000, - "global_memory_bus_width": 128, - "global_memory_bus_bandwidth": 128032000000, - "l2_cache_size": 1048576, + "global_memory_size": 34078982144, + "global_memory_bus_peak_clock_rate": 850000000, + "global_memory_bus_width": 4096, + "global_memory_bus_bandwidth": 870400000000, + "l2_cache_size": 6291456, + "shared_memory_per_sm": 98304, + "shared_memory_per_block": 49152, + "ecc_state": false + }, + { + "id": 1, + "name": "NVIDIA Quadro GP100", + "sm_version": 600, + "ptx_version": 600, + "sm_default_clock_rate": 1442500000, + "number_of_sms": 56, + "max_blocks_per_sm": 32, + "max_threads_per_sm": 2048, + "max_threads_per_block": 1024, + "registers_per_sm": 65536, + "registers_per_block": 65536, + "global_memory_size": 17069309952, + "global_memory_bus_peak_clock_rate": 715000000, + "global_memory_bus_width": 4096, + "global_memory_bus_bandwidth": 732160000000, + "l2_cache_size": 4194304, "shared_memory_per_sm": 65536, "shared_memory_per_block": 49152, "ecc_state": false @@ -25,25 +46,7256 @@ "benchmarks": [ { "index": 0, - "name": "cub::DeviceRadixSort::SortKeys - Overview", + "name": "simple", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ - 0 + 0, + 1 + ], + "axes": null, + "states": { + "Device=0": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": null, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "486" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010094132736625523" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005987183296179167" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010034002306039446" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005072701393681687" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001001473929135854" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "524" + } + } + }, + "is_skipped": false + }, + "Device=1": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": null, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "488" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010074898913934418" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005542305355933818" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010027081287298028" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00035037919649082367" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010014748609703007" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "524" + } + } + }, + "is_skipped": false + } + } + }, + { + "index": 1, + "name": "single_float64_axis", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "devices": [ + 0, + 1 ], "axes": { - "Key": { - "type": "type", + "Duration": { + "type": "float64", "flags": "", "values": [ { - "input_string": "bool", + "input_string": "0", "description": "", - "is_active": true + "value": 0.0 }, + { + "input_string": "0.0001", + "description": "", + "value": 0.0001 + }, + { + "input_string": "0.0002", + "description": "", + "value": 0.0002 + }, + { + "input_string": "0.0003", + "description": "", + "value": 0.00030000000000000003 + }, + { + "input_string": "0.0004", + "description": "", + "value": 0.0004 + }, + { + "input_string": "0.0005", + "description": "", + "value": 0.0005 + }, + { + "input_string": "0.0006", + "description": "", + "value": 0.0006000000000000001 + }, + { + "input_string": "0.0007", + "description": "", + "value": 0.0007000000000000001 + }, + { + "input_string": "0.0008", + "description": "", + "value": 0.0008000000000000001 + }, + { + "input_string": "0.0009", + "description": "", + "value": 0.0009000000000000002 + }, + { + "input_string": "0.001", + "description": "", + "value": 0.0010000000000000002 + } + ] + } + }, + "states": { + "Device=0 Duration=0": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "14061" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "9.102689638005845e-06" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.033946388108068055" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "3.7547417902904438e-06" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.12549022159970946" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "1.630773172830879e-06" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "306655" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0001": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "3835" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00010860168552803123" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004007949999262656" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00010303751935470811" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004789691009751296" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00010137620362095862" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "5088" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0002": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "2174" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00020898149126034966" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002070700973146156" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00020338884861017417" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002418204625044133" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002017283984223771" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2583" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0003": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.00030000000000000003" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1520" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00030825112500000015" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0014009307905580174" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00030272901975793895" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0016163896900565434" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003010571695496376" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1742" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0004": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0004" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1166" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004085718481989706" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0010690404823574895" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00040307120334734023" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0012226190019077351" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004014095938278854" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1304" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0005": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0005" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "945" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0005089798201058188" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0008530028319072816" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0005034217145707861" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0009752402596440034" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0005017619516657686" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1044" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0006": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0006000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "796" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006082355979899511" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0007134353357638104" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006027260286424639" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0008279817736951732" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006010891975612815" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "872" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0007": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0007000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "685" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0007086865854014601" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0006295331091145095" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007030571342384726" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0007151653876403053" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007014426981064088" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "748" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0008": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0008000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "602" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0008090872425249167" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005562631850494214" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008034305715085621" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0006219681072125149" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008017951428707951" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "654" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.0009": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0009000000000000002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "538" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0009084568382899636" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005031047519089767" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009027937730448745" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005501738587938111" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009011217884181701" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "582" + } + } + }, + "is_skipped": false + }, + "Device=0 Duration=0.001": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0010000000000000002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "487" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010086481827515403" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0004266615566594544" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010031193825253714" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0004975122529595318" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001001475909284053" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "524" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "15089" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "8.108349592418312e-06" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.05461449121054022" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "3.271210544150035e-06" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.059765735669007766" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "1.3421442998656208e-06" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "372558" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0001": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "3944" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00010710262145030443" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004128650771669589" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00010247565930403145" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0030818570098060543" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00010137613820964433" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "5117" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0002": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "2193" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00020765215686274505" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0021648763590408093" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00020284258628946086" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0015447061481155045" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002017285137353667" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2584" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0003": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.00030000000000000003" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1537" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0003068213201040992" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0013512096196898148" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00030219575751114794" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0010543163243715088" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003010567871656286" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1736" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0004": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0004" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1176" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00040721289880952437" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0010016437258221326" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004025331704186726" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0007456691947680211" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004014084236753499" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1304" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0005": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0005" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "951" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0005075412103049417" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.000846863074833117" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0005028813449366248" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0006292766848433991" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0005017613753177333" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1045" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0006": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0006000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "800" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006068351487499997" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0006561812659454387" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006021752006560568" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.000511717182892197" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006010895299747637" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "873" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0007": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0007000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "690" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0007071279246376804" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0005386426703062701" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007025530446266783" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00042821786377290075" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007014415557371741" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "748" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0008": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0008000000000000001" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "605" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0008076996363636364" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0006014433173443102" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008029008409208492" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00036509958633429017" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008017936496559632" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "654" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.0009": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0009000000000000002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "540" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0009070510574074071" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00046472458647248545" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009022252441556363" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00033895812399517745" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009011227322607926" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "582" + } + } + }, + "is_skipped": false + }, + "Device=1 Duration=0.001": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "Duration": { + "type": "float64", + "value": "0.0010000000000000002" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "488" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010073550901639342" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0004238073408932392" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010025966528986322" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0003136332645329908" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001001473929135854" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "524" + } + } + }, + "is_skipped": false + } + } + }, + { + "index": 2, + "name": "copy_sweep_grid_shape", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "devices": [ + 0, + 1 + ], + "axes": { + "BlockSize": { + "type": "int64", + "flags": "pow2", + "values": [ + { + "input_string": "6", + "description": "2^6 = 64", + "value": 64 + }, + { + "input_string": "8", + "description": "2^8 = 256", + "value": 256 + }, + { + "input_string": "10", + "description": "2^10 = 1024", + "value": 1024 + } + ] + }, + "NumBlocks": { + "type": "int64", + "flags": "pow2", + "values": [ + { + "input_string": "6", + "description": "2^6 = 64", + "value": 64 + }, + { + "input_string": "8", + "description": "2^8 = 256", + "value": 256 + }, + { + "input_string": "10", + "description": "2^10 = 1024", + "value": 1024 + } + ] + } + }, + "states": { + "Device=0 BlockSize=2^6 NumBlocks=2^6": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "71" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.007065658352112677" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.05358128799632556" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.007059958081849862" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.053589324741995806" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "9505561254.326319" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "76044490034.61055" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.08736729094049925" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.006475561071325232" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "81" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^8 NumBlocks=2^6": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "229" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0021687765283842793" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006699637202043051" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0021633964730141996" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00669331351204079" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "31020141170.19388" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "248161129361.55103" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.28511159163781136" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.002160161503025743" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "244" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^10 NumBlocks=2^6": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "448" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010963011227678571" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.013516109455086892" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001090899714667882" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.013565950821979889" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "61516987398.26961" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "492135899186.15686" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5654134871164486" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010871857387360318" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "481" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^6 NumBlocks=2^8": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "229" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.002169116519650655" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.003880325099879575" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0021636720515755057" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0038733421374846436" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "31016190254.495274" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "248129522035.9622" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.2850752780744051" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0021606314702289093" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "243" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^8 NumBlocks=2^8": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "456" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010761263311403508" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.011961974879208899" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001070721754902288" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.012050980053815875" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "62676286993.08928" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "501410295944.71423" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5760688142747177" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010696770163143381" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "493" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^10 NumBlocks=2^8": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "500" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.000980373466000001" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005650663121151804" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009750024316310896" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005680157515531913" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "68829432443.29456" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "550635459546.3564" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6326234599567514" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009731230225510264" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "542" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^6 NumBlocks=2^10": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "459" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010701848496732027" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008617668166839768" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010647455503218568" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008515610201608317" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "63028076501.20161" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "504224612009.61285" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5793021737242795" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010672177234327936" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "498" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^8 NumBlocks=2^10": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "500" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.000979696614" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0074291976714003565" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009743501433134098" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0074775515242700395" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "68875510986.00674" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "551004087888.054" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6330469759743267" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009717721991970888" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "541" + } + } + }, + "is_skipped": false + }, + "Device=0 BlockSize=2^10 NumBlocks=2^10": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "475" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010337088463157895" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.021637984186463816" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010282407758110449" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.02173209936637211" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "65265709723.54853" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "522125677788.38824" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.599868655547321" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010291563019039125" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "508" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^6 NumBlocks=2^6": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "76" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.006647754513157893" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0011430629751785044" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.006643085875009235" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0011389249175732911" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "10102061792.16473" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "80816494337.31784" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.11038091993186987" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.006639652300484573" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "79" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^8 NumBlocks=2^6": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "216" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.002300918597222223" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0022260554559899452" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0022963019234162794" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0022413389898784455" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "29224756255.11826" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "233798050040.94608" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.3193264450952607" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0022975726211280152" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "228" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^10 NumBlocks=2^6": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "418" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011795720191387577" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0035334409960244696" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001174919423874485" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0035420884521558988" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "57117843688.972115" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "456942749511.7769" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6241023130350974" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011729015622820172" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "448" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^6 NumBlocks=2^8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "224" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0022223120000000006" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0014441799301084402" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00221759328778301" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.001434325968668793" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "30262025218.83109" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "242096201750.6487" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.3306602405903747" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.002216961359573623" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "236" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^8 NumBlocks=2^8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "435" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011336455977011492" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006534400600481561" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001129045183631195" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0065638034102788135" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "59438599068.433075" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "475508792547.4646" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6494602170938929" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011272204485062364" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "466" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^10 NumBlocks=2^8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "256" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "437" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011265385652173912" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002220966435104119" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011218978122933775" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0022003475082832675" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "59817269687.70571" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "478538157501.6457" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6535977894198614" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001119863004765959" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "468" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^6 NumBlocks=2^10": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "64" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "439" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011232369088838266" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00285184985884414" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011185731920403065" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0028360480110887457" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "59995058416.86738" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "479960467334.939" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6555404110234635" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011161975045489451" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "468" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^8 NumBlocks=2^10": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "256" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "440" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011206702840909095" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002536479032620614" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011160453837026254" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0025536971451898373" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "60130945371.914566" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "481047562975.3165" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6570251898155001" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011138856279089096" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "470" + } + } + }, + "is_skipped": false + }, + "Device=1 BlockSize=2^10 NumBlocks=2^10": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "BlockSize": { + "type": "int64", + "value": "1024" + }, + "NumBlocks": { + "type": "int64", + "value": "1024" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "464" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010597870474137931" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0020209648798997564" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010551077248207455" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002011195776784625" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "63603803120.10441" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "508830424960.83527" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6949716250011408" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010536742918941392" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "498" + } + } + }, + "is_skipped": false + } + } + }, + { + "index": 3, + "name": "copy_type_sweep", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "devices": [ + 0, + 1 + ], + "axes": { + "T": { + "type": "type", + "flags": "", + "values": [ { "input_string": "U8", "description": "uint8_t", @@ -64,6 +7316,2415 @@ "description": "uint64_t", "is_active": true }, + { + "input_string": "F32", + "description": "float", + "is_active": true + }, + { + "input_string": "F64", + "description": "double", + "is_active": true + } + ] + } + }, + "states": { + "Device=0 T=U8": { + "device": 0, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U8" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "217" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.002284935774193548" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.003019023225421965" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0022794654072704396" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0030185067855524154" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "117762460945.3669" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "235524921890.7338" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.2705938900399056" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0022792820785984846" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "231" + } + } + }, + "is_skipped": false + }, + "Device=0 T=U16": { + "device": 0, + "type_config_index": 1, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U16" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "341" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0014459254017595295" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005620271181121053" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0014404413371491634" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005659383776137258" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "93178197916.5051" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "372712791666.0204" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.4282086301309977" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0014370339589576198" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "365" + } + } + }, + "is_skipped": false + }, + "Device=0 T=U32": { + "device": 0, + "type_config_index": 2, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U32" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "456" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010763392214912279" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009580925422442722" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010708663173412028" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009602261983780735" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "62667825958.53892" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "501342607668.31134" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5759910474130415" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010690977880559816" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "489" + } + } + }, + "is_skipped": false + }, + "Device=0 T=U64": { + "device": 0, + "type_config_index": 3, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "514" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0009534325642023344" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007974682202520992" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009479809484593146" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008022855237026269" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "35395681795.64538" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "566330908730.326" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6506559153611283" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009457213474094653" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "554" + } + } + }, + "is_skipped": false + }, + "Device=0 T=F32": { + "device": 0, + "type_config_index": 4, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "456" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010769479144736836" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.011261863999383217" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001071445541946512" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.011287071608158339" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "62633947664.836296" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "501071581318.69037" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5756796660370983" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001069358981385523" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "490" + } + } + }, + "is_skipped": false + }, + "Device=0 T=F64": { + "device": 0, + "type_config_index": 5, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "514" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0009534943599221791" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006006780711077088" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009480226613900089" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00602313677626831" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "35394124388.125755" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "566305990210.0121" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6506272865464293" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0009457029259723165" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "552" + } + } + }, + "is_skipped": false + }, + "Device=1 T=U8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U8" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "184" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00270240325" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0033226300614619185" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.002697714079981265" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0033217171224860604" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "99504783695.18842" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "199009567390.37683" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.2718115813351956" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0026982716095753207" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "195" + } + } + }, + "is_skipped": false + }, + "Device=1 T=U16": { + "device": 1, + "type_config_index": 1, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U16" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "325" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0015216281538461547" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0046556036312148845" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0015169812690294725" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004682337277211795" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "88476852509.76712" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "353907410039.0685" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.4833744127500389" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0015158526066057275" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "347" + } + } + }, + "is_skipped": false + }, + "Device=1 T=U32": { + "device": 1, + "type_config_index": 2, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U32" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "435" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011331533540229887" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006418753103730108" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011284679349811587" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0064621372230947265" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "59469003876.588196" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "475752031012.70557" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6497924374627206" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011265910963430138" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "467" + } + } + }, + "is_skipped": false + }, + "Device=1 T=U64": { + "device": 1, + "type_config_index": 3, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "U64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "468" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010515641474358975" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002762541639974713" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001046885606570122" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002764528097772722" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "32051670009.99595" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512826720159.9352" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.7004298516170443" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.001044835600653889" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "503" + } + } + }, + "is_skipped": false + }, + "Device=1 T=F32": { + "device": 1, + "type_config_index": 4, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "435" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011328659609195397" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006308260028809877" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011281658846756504" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.006329740046854081" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "59484925853.163795" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "475879406825.31036" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6499664101088701" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011261699270694815" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "470" + } + } + }, + "is_skipped": false + }, + "Device=1 T=F64": { + "device": 1, + "type_config_index": 5, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "T": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "468" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0010518281880341881" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.002638709647720786" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010471613009770718" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0026399350413532966" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "32043231514.27718" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512691704228.4349" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.7002454439308824" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0010447449703140563" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "502" + } + } + }, + "is_skipped": false + } + } + }, + { + "index": 4, + "name": "copy_type_conversion_sweep", + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "devices": [ + 0, + 1 + ], + "axes": { + "In": { + "type": "type", + "flags": "", + "values": [ { "input_string": "I8", "description": "int8_t", @@ -80,13 +9741,13 @@ "is_active": true }, { - "input_string": "I64", - "description": "int64_t", + "input_string": "F32", + "description": "float", "is_active": true }, { - "input_string": "F32", - "description": "float", + "input_string": "I64", + "description": "int64_t", "is_active": true }, { @@ -96,77 +9757,45 @@ } ] }, - "Input": { + "Out": { "type": "type", "flags": "", "values": [ { - "input_string": "Rand", - "description": "Random values uniformly distributed across `T`'s value range", + "input_string": "I8", + "description": "int8_t", "is_active": true - } - ] - }, - "Pattern": { - "type": "type", - "flags": "", - "values": [ + }, { - "input_string": "Ascend", - "description": "", + "input_string": "I16", + "description": "int16_t", "is_active": true - } - ] - }, - "Elements": { - "type": "int64", - "flags": "pow2", - "values": [ - { - "input_string": "16", - "description": "2^16 = 65536", - "value": 65536 }, { - "input_string": "18", - "description": "2^18 = 262144", - "value": 262144 + "input_string": "I32", + "description": "int32_t", + "is_active": true }, { - "input_string": "20", - "description": "2^20 = 1048576", - "value": 1048576 + "input_string": "F32", + "description": "float", + "is_active": true }, { - "input_string": "22", - "description": "2^22 = 4194304", - "value": 4194304 + "input_string": "I64", + "description": "int64_t", + "is_active": true }, { - "input_string": "24", - "description": "2^24 = 16777216", - "value": 16777216 - }, - { - "input_string": "26", - "description": "2^26 = 67108864", - "value": 67108864 - }, - { - "input_string": "28", - "description": "2^28 = 268435456", - "value": 268435456 - }, - { - "input_string": "30", - "description": "2^30 = 1073741824", - "value": 1073741824 + "input_string": "F64", + "description": "double", + "is_active": true } ] } }, "states": { - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^16": { + "Device=0 In=I8 Out=I8": { "device": 0, "type_config_index": 0, "min_samples": 10, @@ -175,4378 +9804,48 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "bool" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 65536 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2609 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 4.044829436565725e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.7344469005707992 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 3.3346968211430716e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.7223623748539709 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1965276110.993967 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 3930552221.987934 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.030699764293207435 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 262144 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1611 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 4.803054003724396e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.12230911675278 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 4.140059072073415e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.10760267299529298 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6331890329.011988 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 12663780658.023975 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.09891105862615576 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 508 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00010131082677165365 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.022658862636519568 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 9.499697684948347e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02014810246126259 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 11037993363.319344 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 22075986726.638687 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.17242553991688553 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 134 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0003098910447761196 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02924359345479745 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00030219128372064273 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010038496680263341 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 13879632623.280346 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 27759265246.56069 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.21681505597476172 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 32 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0011546593749999997 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.1716310430725688 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0011058050058782103 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006801724834036127 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15171947957.204117 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 30343895914.408234 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.23700243622225875 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 8 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.004268425 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.003888230871894963 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0042546199560165405 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0018586331551975715 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15773174735.642382 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31546349471.284763 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24639425668024215 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.01962395 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.019611552238464357 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 13687619049.017168 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 27375238098.034336 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.21381559374245765 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=bool Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "bool" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0675028 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.06749183654785157 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15909210341.886597 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31818420683.773193 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24851928177153518 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 65536 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2970 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 5.3952222222222165e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.32079964560195756 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 4.756137399435661e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.19639288507532943 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1377924868.3559093 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 2755849736.7118187 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.0215246948943375 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 262144 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1528 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 8.000445026177996e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.09211021423127838 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 7.354155020466494e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.06982489161891461 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3564569950.8707323 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 7129139901.741465 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.05568248486113991 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 474 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00017160316455696196 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.062041477145776486 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0001641129443917092 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04734404630253115 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6389355842.018351 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 12778711684.036701 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.09980873284832464 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 124 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0005410080645161291 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0214250036018518 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0005334214202819335 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.015317548261178274 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 7863021319.584713 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15726042639.169426 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12282900086829407 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 30 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.001945613333333334 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007356225627552308 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0019374335924784336 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007393135530598775 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8659505061.300186 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17319010122.600372 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.135270948845604 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 8 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00753255 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.003915793151916625 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.007524112045764923 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0038120757657901278 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8919173929.337402 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17838347858.674805 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13932726083068925 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.030915200000000004 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.030904863357543944 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8685864515.704916 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17371729031.409832 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13568271237979437 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.1319179 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.13190963745117187 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8139980101.131428 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16279960202.262856 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12715540023012104 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 131072 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2295 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 8.879450980392147e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.11816100044470679 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 8.305174021094968e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.12439270947006983 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 789098456.3783966 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 3156393825.5135865 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.024653163470957154 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 524288 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1176 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00015293358843537406 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.044663112370144566 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00014689224612500005 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.03577765071007969 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1784600664.1965623 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 7138402656.786249 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.05575483204813054 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 396 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00038702020202020234 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.029965184414531703 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00038085559544840265 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.029252982130513375 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2753211486.2733016 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 11012845945.093206 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.08601635485732634 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 106 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0011748773584905655 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.015819656338631416 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.001168463095179144 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.015910226957725492 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3589590477.7009206 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14358361910.803682 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11214666576171334 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 27 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.004157185185185186 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006148449005215739 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.004149304955093949 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005989462876723687 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4043379838.689183 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16173519354.756733 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12632403894929964 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.015923899999999998 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0030102828263264473 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.015911899294172017 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0030381049232100386 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4217526943.787262 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16870107775.149048 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13176477579940207 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0675835 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.06757376098632813 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3972480620.9071484 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15889922483.628593 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12410899215530956 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 + "value": "I8" } }, "summaries": null, "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" + "skip_reason": "Not a conversion: InputType == OutputType." }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^16": { + "Device=0 In=I8 Out=I16": { "device": 0, - "type_config_index": 3, + "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U32" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "I16" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -4554,11 +9853,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 262144 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" } }, "Number of Samples (Cold)": { @@ -4576,7 +9889,7 @@ }, "value": { "type": "int64", - "value": 2075 + "value": "775" } }, "Average CPU Time (Cold)": { @@ -4594,7 +9907,7 @@ }, "value": { "type": "float64", - "value": 9.593363855421674e-05 + "value": "0.0006248230980645156" } }, "CPU Relative Standard Deviation (Cold)": { @@ -4612,7 +9925,7 @@ }, "value": { "type": "float64", - "value": 0.14775468420150914 + "value": "0.0027640779893251216" } }, "Average GPU Time (Cold)": { @@ -4630,7 +9943,7 @@ }, "value": { "type": "float64", - "value": 8.907498377992449e-05 + "value": "0.0006193935315070645" } }, "GPU Relative Standard Deviation (Cold)": { @@ -4648,7 +9961,7 @@ }, "value": { "type": "float64", - "value": 0.15871029125259384 + "value": "0.0028186397219177456" } }, "Element Throughput": { @@ -4666,7 +9979,7 @@ }, "value": { "type": "float64", - "value": 735739679.3011861 + "value": "108346084655.93024" } }, "Average Global Memory Throughput": { @@ -4684,7 +9997,7 @@ }, "value": { "type": "float64", - "value": 5885917434.409489 + "value": "325038253967.7907" } }, "Percent Peak Global Memory Throughput": { @@ -4702,39 +10015,77 @@ }, "value": { "type": "float64", - "value": 0.04597223689709985 + "value": "0.37343549398873016" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006171660299862132" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "850" } } }, "is_skipped": false }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^18": { + "Device=0 In=I8 Out=I32": { "device": 0, - "type_config_index": 3, + "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U32" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 + "value": "I32" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -4742,11 +10093,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 1048576 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" } }, "Number of Samples (Cold)": { @@ -4764,7 +10129,7 @@ }, "value": { "type": "int64", - "value": 955 + "value": "660" } }, "Average CPU Time (Cold)": { @@ -4782,7 +10147,7 @@ }, "value": { "type": "float64", - "value": 0.0002228811518324608 + "value": "0.0007372658136363634" } }, "CPU Relative Standard Deviation (Cold)": { @@ -4800,7 +10165,7 @@ }, "value": { "type": "float64", - "value": 0.025900335416445572 + "value": "0.004348049843468552" } }, "Average GPU Time (Cold)": { @@ -4818,7 +10183,7 @@ }, "value": { "type": "float64", - "value": 0.00021573310965642857 + "value": "0.0007317814296845251" } }, "GPU Relative Standard Deviation (Cold)": { @@ -4836,7 +10201,7 @@ }, "value": { "type": "float64", - "value": 0.02430793263473743 + "value": "0.004351029775591727" } }, "Element Throughput": { @@ -4854,7 +10219,7 @@ }, "value": { "type": "float64", - "value": 1215131049.737726 + "value": "91706158803.36154" } }, "Average Global Memory Throughput": { @@ -4872,7 +10237,7 @@ }, "value": { "type": "float64", - "value": 9721048397.901808 + "value": "458530794016.8077" } }, "Percent Peak Global Memory Throughput": { @@ -4890,13 +10255,49 @@ }, "value": { "type": "float64", - "value": 0.07592670893137503 + "value": "0.5268046806259279" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007299521218782687" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "717" } } }, "is_skipped": false }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20": { + "Device=0 In=I8 Out=F32": { "device": 0, "type_config_index": 3, "min_samples": 10, @@ -4905,24 +10306,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U32" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 + "value": "F32" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -4930,11 +10333,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 4194304 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" } }, "Number of Samples (Cold)": { @@ -4952,7 +10369,7 @@ }, "value": { "type": "int64", - "value": 323 + "value": "656" } }, "Average CPU Time (Cold)": { @@ -4970,7 +10387,7 @@ }, "value": { "type": "float64", - "value": 0.0006567151702786381 + "value": "0.000742387521341463" } }, "CPU Relative Standard Deviation (Cold)": { @@ -4988,7 +10405,7 @@ }, "value": { "type": "float64", - "value": 0.010434977776651313 + "value": "0.0041525675601748364" } }, "Average GPU Time (Cold)": { @@ -5006,7 +10423,7 @@ }, "value": { "type": "float64", - "value": 0.0006496768811538867 + "value": "0.0007369443420775064" } }, "GPU Relative Standard Deviation (Cold)": { @@ -5024,7 +10441,7 @@ }, "value": { "type": "float64", - "value": 0.010470007981899945 + "value": "0.004193469264853706" } }, "Element Throughput": { @@ -5042,7 +10459,7 @@ }, "value": { "type": "float64", - "value": 1613996173.2017174 + "value": "91063680346.35373" } }, "Average Global Memory Throughput": { @@ -5060,7 +10477,7 @@ }, "value": { "type": "float64", - "value": 12911969385.613739 + "value": "455318401731.7686" } }, "Percent Peak Global Memory Throughput": { @@ -5078,795 +10495,49 @@ }, "value": { "type": "float64", - "value": 0.10084954843799784 + "value": "0.5231139725778592" } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 16777216 + "type": "float64", + "value": "0.0007352807822347689" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 83 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0023719566265060247 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.028543008422411302 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0023625457344284987 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02881928350302021 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1775332404.7352695 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14202659237.882156 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11093054266028927 + "value": "714" } } }, "is_skipped": false }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 21 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.009017638095238097 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007349245214913964 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.009007660184587752 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007398716674765135 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1862549836.0502186 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14900398688.401749 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11638026968571723 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 5 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.03635022 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.011186633894477735 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0363376838684082 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01114331248801505 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1846811817.8094478 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14774494542.475582 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1153968893907428 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.14715240000000002 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.14714149475097654 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1824335524.4847984 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14594684195.878387 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11399247216225934 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^16": { + "Device=0 In=I8 Out=I64": { "device": 0, "type_config_index": 4, "min_samples": 10, @@ -5875,24 +10546,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U64" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "I64" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -5900,11 +10573,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 524288 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "536870912" } }, "Number of Samples (Cold)": { @@ -5922,7 +10609,7 @@ }, "value": { "type": "int64", - "value": 1308 + "value": "407" } }, "Average CPU Time (Cold)": { @@ -5940,7 +10627,7 @@ }, "value": { "type": "float64", - "value": 0.00021293440366972473 + "value": "0.0012095483882063889" } }, "CPU Relative Standard Deviation (Cold)": { @@ -5958,7 +10645,7 @@ }, "value": { "type": "float64", - "value": 0.06644665882181848 + "value": "0.009732185124544102" } }, "Average GPU Time (Cold)": { @@ -5976,7 +10663,7 @@ }, "value": { "type": "float64", - "value": 0.00020585504468154448 + "value": "0.001204128551248836" } }, "GPU Relative Standard Deviation (Cold)": { @@ -5994,7 +10681,7 @@ }, "value": { "type": "float64", - "value": 0.06352394541054457 + "value": "0.009798212399727946" } }, "Element Throughput": { @@ -6012,7 +10699,7 @@ }, "value": { "type": "float64", - "value": 318359941.5860004 + "value": "55732308589.8092" } }, "Average Global Memory Throughput": { @@ -6030,7 +10717,7 @@ }, "value": { "type": "float64", - "value": 5093759065.376006 + "value": "501590777308.2828" } }, "Percent Peak Global Memory Throughput": { @@ -6048,14 +10735,4308 @@ }, "value": { "type": "float64", - "value": 0.03978504643664089 + "value": "0.576276168782494" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0012017273091491842" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "429" } } }, "is_skipped": false }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^18": { + "Device=0 In=I8 Out=F64": { "device": 0, + "type_config_index": 5, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "536870912" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "415" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0011847366168674703" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.011261383409993239" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011792877487389432" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.011302242538631406" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "56906267424.351715" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512156406819.16547" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5884149894521662" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0011767830588600852" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "440" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I16 Out=I8": { + "device": 0, + "type_config_index": 6, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I16 Out=I16": { + "device": 0, + "type_config_index": 7, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=0 In=I16 Out=I32": { + "device": 0, + "type_config_index": 8, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1105" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00043142517375565617" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.01116818587784149" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00042600826737028365" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.011332580467569093" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "78764743715.25449" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "472588462291.5269" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5429554943606697" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00042359266142467694" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1238" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I16 Out=F32": { + "device": 0, + "type_config_index": 9, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1102" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00043289838384754937" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008465395678081931" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00042745939692221985" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008617999240612035" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "78497354933.81969" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "470984129602.9181" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5411122812533525" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00042536910129233627" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1229" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I16 Out=I64": { + "device": 0, + "type_config_index": 10, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "734" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006609588569482289" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007896476276327823" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006555628124472239" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007981909890800989" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "51184160179.466095" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "511841601794.66095" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5880533108854101" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006538430490801411" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "806" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I16 Out=F64": { + "device": 0, + "type_config_index": 11, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "734" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006605395899182562" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007740408518735753" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006550883051485072" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.007833851008491804" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "51221234963.72489" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512212349637.2489" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5884792619913246" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006534532250824923" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "805" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I32 Out=I8": { + "device": 0, + "type_config_index": 12, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I32 Out=I16": { + "device": 0, + "type_config_index": 13, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I32 Out=I32": { + "device": 0, + "type_config_index": 14, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=0 In=I32 Out=F32": { + "device": 0, + "type_config_index": 15, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1735" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00026702492853025945" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.01324576727299336" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00026161364844278195" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.013450268523907918" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "64129742847.37816" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "513037942779.02527" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5894277835236963" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00025957003988639885" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2015" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I32 Out=I64": { + "device": 0, + "type_config_index": 16, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1234" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0003841953128038892" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008873245446388355" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003788044850192556" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008932234099031263" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44289908550.44172" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "531478902605.3006" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6106145480299869" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037766468619885956" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1381" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I32 Out=F64": { + "device": 0, + "type_config_index": 17, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1235" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0003840312064777327" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009389520289783196" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037863498520754796" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009545097161422792" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44309735379.58624" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "531716824555.03485" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6108878958582662" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003773968978051128" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1396" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F32 Out=I8": { + "device": 0, + "type_config_index": 18, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F32 Out=I16": { + "device": 0, + "type_config_index": 19, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F32 Out=I32": { + "device": 0, + "type_config_index": 20, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1726" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00026856249884125153" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.01342456387766187" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00026315643022814674" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.013724796519135959" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "63753775598.24316" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "510030204785.94525" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5859722021897349" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002609094005709575" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2047" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F32 Out=F32": { + "device": 0, + "type_config_index": 21, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=0 In=F32 Out=I64": { + "device": 0, + "type_config_index": 22, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1235" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0003840352834008098" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009209302867708775" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037863381922486526" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009434239106344595" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44309871829.05669" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "531718461948.68024" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6108897770550095" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037729541193829834" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1370" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F32 Out=F64": { + "device": 0, + "type_config_index": 23, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1233" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0003844534225466336" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009387088977698597" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00037907109053659035" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009568452852068391" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44258758894.67376" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "531105106736.0851" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6101850950552448" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0003776787067281789" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1392" + } + } + }, + "is_skipped": false + }, + "Device=0 In=I64 Out=I8": { + "device": 0, + "type_config_index": 24, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I64 Out=I16": { + "device": 0, + "type_config_index": 25, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I64 Out=I32": { + "device": 0, + "type_config_index": 26, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I64 Out=F32": { + "device": 0, + "type_config_index": 27, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=I64 Out=I64": { + "device": 0, + "type_config_index": 28, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=0 In=I64 Out=F64": { + "device": 0, + "type_config_index": 29, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "8388608" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1865" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002468652632707771" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008794568336063534" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002414397094508553" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009088437943671243" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "34744110730.913086" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "555905771694.6094" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6386785060829612" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00023926271107803853" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2168" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F64 Out=I8": { + "device": 0, + "type_config_index": 30, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F64 Out=I16": { + "device": 0, + "type_config_index": 31, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F64 Out=I32": { + "device": 0, + "type_config_index": 32, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F64 Out=F32": { + "device": 0, + "type_config_index": 33, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=0 In=F64 Out=I64": { + "device": 0, + "type_config_index": 34, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "8388608" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1861" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002474318479312196" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009416123268532244" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00024199313163148308" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.009609928378243537" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "34664653262.864136" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "554634452205.8262" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6372178908614731" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00024011272523290366" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2177" + } + } + }, + "is_skipped": false + }, + "Device=0 In=F64 Out=F64": { + "device": 0, + "type_config_index": 35, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F64" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=I8 Out=I8": { + "device": 1, + "type_config_index": 0, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=I8 Out=I16": { + "device": 1, + "type_config_index": 1, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "715" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0006812909104895107" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.029682520209047932" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0006765060471488043" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.029725089166496972" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "99199207875.28265" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "297597623625.84796" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.4064652857651988" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.000659287437142213" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "797" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I8 Out=I32": { + "device": 1, + "type_config_index": 2, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "566" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0008641483356890464" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00815440605473416" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008593197461783684" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008178118032486274" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "78095335640.14047" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "390476678200.70233" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5333215119655572" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008574365556141886" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "607" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I8 Out=F32": { + "device": 1, + "type_config_index": 3, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I8" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "568" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0008621727816901408" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008348927642653206" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.000857566987334842" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.008407666935430734" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "78254952663.88672" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "391274763319.4336" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.5344115539218662" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0008559337940091401" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "612" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I8 Out=I64": { + "device": 1, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, @@ -6063,24 +15044,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "U64" + "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 + "value": "I64" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -6088,11 +15071,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 2097152 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "536870912" } }, "Number of Samples (Cold)": { @@ -6110,7 +15107,7 @@ }, "value": { "type": "int64", - "value": 577 + "value": "339" } }, "Average CPU Time (Cold)": { @@ -6128,7 +15125,7 @@ }, "value": { "type": "float64", - "value": 0.0005508495667244364 + "value": "0.0014581254159292036" } }, "CPU Relative Standard Deviation (Cold)": { @@ -6146,7 +15143,7 @@ }, "value": { "type": "float64", - "value": 0.011094070357430255 + "value": "0.005934832249204677" } }, "Average GPU Time (Cold)": { @@ -6164,7 +15161,7 @@ }, "value": { "type": "float64", - "value": 0.0005439041656679245 + "value": "0.001453499562620765" } }, "GPU Relative Standard Deviation (Cold)": { @@ -6182,7 +15179,7 @@ }, "value": { "type": "float64", - "value": 0.01115097584159383 + "value": "0.005963799027107206" } }, "Element Throughput": { @@ -6200,7 +15197,7 @@ }, "value": { "type": "float64", - "value": 481967259.21023655 + "value": "46170542961.153595" } }, "Average Global Memory Throughput": { @@ -6218,7 +15215,7 @@ }, "value": { "type": "float64", - "value": 7711476147.363785 + "value": "415534886650.3824" } }, "Percent Peak Global Memory Throughput": { @@ -6236,826 +15233,50 @@ }, "value": { "type": "float64", - "value": 0.060230849688857356 + "value": "0.5675465562860337" } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 8388608 + "type": "float64", + "value": "0.0014501432381838642" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 180 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0018207355555555549 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.009870487088215963 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0018114751981364358 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.009874938925190486 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 578851977.150296 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9261631634.404736 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07233841254065183 + "value": "361" } } }, "is_skipped": false }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 46 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.007233730434782609 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02377618510012125 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.007224911980007006 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.023868355689650532 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 580533577.6555624 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9288537242.488998 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07254856006692857 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 12 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.02786785833333334 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006751988230344623 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.027856205463409427 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.00670616725677252 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 602279302.6149144 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9636468841.83863 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.0752660963027886 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 3 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.11277853333333336 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.11276614379882811 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 595115357.6708314 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9521845722.733303 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.0743708270021034 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 4, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I8 Out=F64": { + "device": 1, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, @@ -7063,24 +15284,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "I8" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "F64" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -7088,11 +15311,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 65536 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "536870912" } }, "Number of Samples (Cold)": { @@ -7110,7 +15347,7 @@ }, "value": { "type": "int64", - "value": 2603 + "value": "339" } }, "Average CPU Time (Cold)": { @@ -7128,7 +15365,7 @@ }, "value": { "type": "float64", - "value": 6.0122819823280644e-05 + "value": "0.0014608549616519177" } }, "CPU Relative Standard Deviation (Cold)": { @@ -7146,7 +15383,7 @@ }, "value": { "type": "float64", - "value": 0.2017958130718533 + "value": "0.005454444454530878" } }, "Average GPU Time (Cold)": { @@ -7164,7 +15401,7 @@ }, "value": { "type": "float64", - "value": 5.2423252989947936e-05 + "value": "0.0014561624537527042" } }, "GPU Relative Standard Deviation (Cold)": { @@ -7182,7 +15419,7 @@ }, "value": { "type": "float64", - "value": 0.19022823395202412 + "value": "0.0054738241927221685" } }, "Element Throughput": { @@ -7200,7 +15437,7 @@ }, "value": { "type": "float64", - "value": 1250132265.0192351 + "value": "46086110671.96002" } }, "Average Global Memory Throughput": { @@ -7218,7 +15455,7 @@ }, "value": { "type": "float64", - "value": 2500264530.0384703 + "value": "414774996047.64026" } }, "Percent Peak Global Memory Throughput": { @@ -7236,1330 +15473,50 @@ }, "value": { "type": "float64", - "value": 0.019528434532292475 + "value": "0.5665086812276555" } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 262144 + "type": "float64", + "value": "0.0014524769206623453" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 1398 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 8.949971387696734e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.11888824393616607 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 8.337345877622417e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.023068268156533414 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3144214044.227182 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 6288428088.454364 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.049116065424693545 + "value": "364" } } }, "is_skipped": false }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 481 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00017325010395010385 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0066179185169862416 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00016742034008621916 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.00660582250812737 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6263133854.942582 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 12526267709.885164 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.09783700723166992 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 124 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0005489895161290321 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010368472853039202 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0005430583213606187 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010216130794040566 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 7723487211.26541 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15446974422.53082 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12064932534468586 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 30 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0019625433333333338 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007820820087352303 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0019545994718869527 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006915737015092503 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8583454687.933291 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17166909375.866583 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13408295875926787 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 8 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0075993375 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006229944971220976 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.007590148031711578 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006228135758381483 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8841575120.751228 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17683150241.502457 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13811508249111518 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.030969200000000002 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.03096171188354492 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8669916476.50672 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17339832953.01344 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1354335865487803 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I8 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 5, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.1339969 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.13392364501953125 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 8017567202.889429 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16035134405.778858 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12524317675095958 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=I8": { + "device": 1, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, @@ -8567,1345 +15524,21 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "I16" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 131072 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2279 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 8.156450197454992e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.060115866604561134 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 7.509039476441896e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.030698884823584592 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 872761425.8202536 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 3491045703.2810144 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.02726697781242982 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 524288 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1080 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00015584981481481477 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.1277032897361289 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00014661810500202392 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.023857683089802767 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1787937444.672207 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 7151749778.688828 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.055859080375912494 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 388 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.000382848969072165 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0119905873372064 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.000376419794160066 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010115421873716586 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2785655845.5959177 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 11142623382.38367 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.08702998767795295 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 107 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0011626495327102806 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.011046847535561077 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0011553172374440123 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010639681594967242 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3630434883.2181773 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14521739532.87271 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1134227344169638 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 26 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.004200307692307692 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.008829704122151534 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.004190125520412738 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.008654100653918605 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4003988882.4971046 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16015955529.988419 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.12509337923322622 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.015656399999999997 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004407923988907438 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.015647593361990793 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004421018196379044 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4288765847.086274 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17155063388.345097 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1339904351126679 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0661545 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.06614323425292969 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4058396282.4302044 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16233585129.720818 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1267931855295615 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I16 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 6, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 + "value": "I8" } }, "summaries": null, "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=I16": { + "device": 1, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, @@ -9913,1345 +15546,21 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "I32" + "value": "I16" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 262144 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1863 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 9.472168545356936e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.1737926133675047 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 8.730608265261114e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.1873449437251008 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 750646438.4705727 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 6005171507.764582 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.046903676485289474 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 919 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0002210763873775844 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.07035645807624645 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00021252275933638198 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.06707277848247957 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1233486713.6986363 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9867893709.58909 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07707365119336643 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 317 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0006475441640378547 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.014103444072938905 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0006398196086898584 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.013812890521369204 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1638861931.95476 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 13110895455.63808 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.10240326992969008 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 84 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0023177226190476186 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.019912883301982914 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0023088179202306832 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.020342096350492007 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1816645636.3873556 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14533165091.098845 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11351197428063957 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 21 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.008913814285714286 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006001791310703415 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.008904065495445614 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0060174926866995325 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1884219743.0579844 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15073757944.463875 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11773430036603251 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 6 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.03555846666666667 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.008199160296918712 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.03554372278849283 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.00819692333781414 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1888065141.609935 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 15104521132.87948 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11797457770619439 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.1451229 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.14511042785644532 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1849870198.6156194 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 14798961588.924955 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.11558799041587224 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I32 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 7, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 + "value": "I16" } }, "summaries": null, "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" + "skip_reason": "Not a conversion: InputType == OutputType." }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=I32": { + "device": 1, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, @@ -11259,24 +15568,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", - "value": "I64" + "value": "I16" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "I32" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -11284,11 +15595,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 524288 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" } }, "Number of Samples (Cold)": { @@ -11306,7 +15631,7 @@ }, "value": { "type": "int64", - "value": 1141 + "value": "1042" } }, "Average CPU Time (Cold)": { @@ -11324,7 +15649,7 @@ }, "value": { "type": "float64", - "value": 0.0002058517966695883 + "value": "0.00046152389539347375" } }, "CPU Relative Standard Deviation (Cold)": { @@ -11342,7 +15667,7 @@ }, "value": { "type": "float64", - "value": 0.11032147567669195 + "value": "0.007516961198942111" } }, "Average GPU Time (Cold)": { @@ -11360,7 +15685,7 @@ }, "value": { "type": "float64", - "value": 0.00019295587745233125 + "value": "0.0004568425950928514" } }, "GPU Relative Standard Deviation (Cold)": { @@ -11378,7 +15703,7 @@ }, "value": { "type": "float64", - "value": 0.0891999662438443 + "value": "0.0075614567935713" } }, "Element Throughput": { @@ -11396,7 +15721,7 @@ }, "value": { "type": "float64", - "value": 339642413.93056464 + "value": "73448562722.52853" } }, "Average Global Memory Throughput": { @@ -11414,7 +15739,7 @@ }, "value": { "type": "float64", - "value": 5434278622.889034 + "value": "440691376335.17114" } }, "Percent Peak Global Memory Throughput": { @@ -11432,1014 +15757,50 @@ }, "value": { "type": "float64", - "value": 0.04244469056867841 + "value": "0.6019058352479938" } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 2097152 + "type": "float64", + "value": "0.00045486935942230756" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 530 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0005289122641509438 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.33053905464430366 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0005132295253704184 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02690586292503542 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 510773420.1589437 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 8172374722.543099 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.06383071983990798 + "value": "1156" } } }, "is_skipped": false }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 177 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0017896672316384183 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.015371140659983133 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.001780063275563514 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.015018523735211514 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 589066700.2655019 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9425067204.24803 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.0736149337997378 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 47 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.006963521276595745 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.021866700638764357 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.006949177173857993 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.021990225740270965 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 603568436.2428534 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9657094979.885654 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07542719773092395 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 12 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.027483341666666675 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.009214836696393284 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.027467103958129877 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.009243931693158285 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 610811246.2666156 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9772979940.26585 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07633232270265129 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 3 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.11026343333333334 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.11025037638346354 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 608695101.1086586 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9739121617.738537 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07606787067091458 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=I64 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 8, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "I64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=F32": { + "device": 1, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, @@ -12447,24 +15808,26 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { "type": "string", "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -12472,11 +15835,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 262144 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" } }, "Number of Samples (Cold)": { @@ -12494,7 +15871,7 @@ }, "value": { "type": "int64", - "value": 1764 + "value": "1047" } }, "Average CPU Time (Cold)": { @@ -12512,7 +15889,7 @@ }, "value": { "type": "float64", - "value": 9.839484126984118e-05 + "value": "0.00045967601432664773" } }, "CPU Relative Standard Deviation (Cold)": { @@ -12530,7 +15907,7 @@ }, "value": { "type": "float64", - "value": 0.2859180404938184 + "value": "0.007580415029008197" } }, "Average GPU Time (Cold)": { @@ -12548,7 +15925,7 @@ }, "value": { "type": "float64", - "value": 9.14749023493997e-05 + "value": "0.00045502618507418957" } }, "GPU Relative Standard Deviation (Cold)": { @@ -12566,7 +15943,7 @@ }, "value": { "type": "float64", - "value": 0.3060250465322957 + "value": "0.007602410404504316" } }, "Element Throughput": { @@ -12584,7 +15961,7 @@ }, "value": { "type": "float64", - "value": 716436949.5545034 + "value": "73741760585.77625" } }, "Average Global Memory Throughput": { @@ -12602,7 +15979,7 @@ }, "value": { "type": "float64", - "value": 5731495596.436028 + "value": "442450563514.6575" } }, "Percent Peak Global Memory Throughput": { @@ -12620,1172 +15997,50 @@ }, "value": { "type": "float64", - "value": 0.04476611781770204 + "value": "0.6043085712339618" } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 - } - }, - "summaries": { - "Input Buffer Size: ": { + "Average GPU Time (Batch)": { "hint": { "type": "string", - "value": "bytes" + "value": "duration" }, "short_name": { "type": "string", - "value": "Size" + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { - "type": "int64", - "value": 1048576 + "type": "float64", + "value": "0.0004530724069916505" } }, - "Number of Samples (Cold)": { + "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", - "value": "Samples" + "value": "Batch" }, "description": { "type": "string", - "value": "Number of kernel executions in cold time measurements." + "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", - "value": 1059 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0001759974504249292 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.06830900199777254 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00016901520620662156 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0683105078072142 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1551008373.0544827 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 12408066984.435862 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.09691379486718837 + "value": "1164" } } }, "is_skipped": false }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 360 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0004911733333333335 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.021226057185570184 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00048378142292300816 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.021253466020927158 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2167458174.942936 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17339665399.543488 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13543227786446738 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 96 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0017458687499999996 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.003986863328356874 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0017372319946686425 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0034632504917798987 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2414360323.130024 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 19314882585.04019 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15085980524431541 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 24 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.006740016666666665 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0016710482080877498 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.006730861365795134 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0016728064092106185 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2492580828.54869 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 19940646628.38952 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15574736494305735 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 6 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.026749483333333338 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.000816878555365262 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.026738741238911946 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0007800612715292246 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2509798924.353957 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20078391394.831657 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15682322696538098 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.10676805 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.10674793624877929 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2514666469.7517247 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20117331758.013798 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1571273725163537 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F32 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 9, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^16": { - "device": 0, + "Device=1 In=I16 Out=I64": { + "device": 1, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, @@ -13793,776 +16048,2516 @@ "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "648" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0007539600570987655" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005701338376763893" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.000749293333218421" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005730659247124155" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44781436738.365845" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "447814367383.65845" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6116345708365091" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007462590063859665" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "701" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I16 Out=F64": { + "device": 1, + "type_config_index": 11, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I16" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "33554432" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "268435456" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "650" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0007515365646153841" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005320261152122883" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007468673968315132" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.00533121216008688" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "44926893505.259796" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "449268935052.59796" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6136212508913325" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0007440289011028757" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "702" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I32 Out=I8": { + "device": 1, + "type_config_index": 12, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I32 Out=I16": { + "device": 1, + "type_config_index": 13, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I32 Out=I32": { + "device": 1, + "type_config_index": 14, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=I32 Out=F32": { + "device": 1, + "type_config_index": 15, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1688" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00027765218187203764" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005690620491369388" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00027302053109941316" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.005713997774637474" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "61450382256.75059" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "491603058054.0047" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6714421138193901" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00027140032503120137" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1928" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I32 Out=I64": { + "device": 1, + "type_config_index": 16, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1134" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.000422905379188712" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004818481737573335" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004182333121013812" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004829428135064118" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "40114489961.844894" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "481373879542.13873" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6574708800564614" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004160488643510754" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1267" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I32 Out=F64": { + "device": 1, + "type_config_index": 17, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I32" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1132" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004233320008833917" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004685003714910728" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041865130761381596" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004676709118042214" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "40074438309.11453" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "480893259709.37445" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6568144390698405" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041636213471617884" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1264" + } + } + }, + "is_skipped": false + }, + "Device=1 In=F32 Out=I8": { + "device": 1, + "type_config_index": 18, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=F32 Out=I16": { + "device": 1, + "type_config_index": 19, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=F32 Out=I32": { + "device": 1, + "type_config_index": 20, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1665" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002817099831831833" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.012603278274487326" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002770048382224978" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.0127786417628205" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "60566508901.63906" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "484532071213.1125" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6617844067049723" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0002751834324989535" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1941" + } + } + }, + "is_skipped": false + }, + "Device=1 In=F32 Out=F32": { + "device": 1, + "type_config_index": 21, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=F32 Out=I64": { + "device": 1, + "type_config_index": 22, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1133" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0004230943777581643" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004719817832949844" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041844157444515244" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004751688895767683" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "40094524599.393234" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "481134295192.7188" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.657143650558237" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.0004160357588015425" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1252" + } + } + }, + "is_skipped": false + }, + "Device=1 In=F32 Out=F64": { + "device": 1, + "type_config_index": 23, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "F32" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "16777216" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "134217728" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1132" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.00042342536395759757" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004748798224952708" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041871643782504436" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004750041166889743" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "40068204838.45002" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "480818458061.40027" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6567122733574632" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00041632065453087554" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "1252" + } + } + }, + "is_skipped": false + }, + "Device=1 In=I64 Out=I8": { + "device": 1, + "type_config_index": 24, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I8" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I64 Out=I16": { + "device": 1, + "type_config_index": 25, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I16" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I64 Out=I32": { + "device": 1, + "type_config_index": 26, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I64 Out=F32": { + "device": 1, + "type_config_index": 27, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "F32" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." + }, + "Device=1 In=I64 Out=I64": { + "device": 1, + "type_config_index": 28, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "I64" + } + }, + "summaries": null, + "is_skipped": true, + "skip_reason": "Not a conversion: InputType == OutputType." + }, + "Device=1 In=I64 Out=F64": { + "device": 1, + "type_config_index": 29, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { + "type": "string", + "value": "I64" + }, + "Out": { + "type": "string", + "value": "F64" + } + }, + "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "8388608" + } + }, + "Input Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "InSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" + } + }, + "Number of Samples (Cold)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Samples" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in cold time measurements." + }, + "value": { + "type": "int64", + "value": "1753" + } + }, + "Average CPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "CPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time observed from host." + }, + "value": { + "type": "float64", + "value": "0.0002666450433542495" + } + }, + "CPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold CPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004046628770937376" + } + }, + "Average GPU Time (Cold)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "GPU Time" + }, + "description": { + "type": "string", + "value": "Average isolated kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00026198611749762206" + } + }, + "GPU Relative Standard Deviation (Cold)": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "Noise" + }, + "description": { + "type": "string", + "value": "Relative standard deviation of the cold GPU execution time measurements." + }, + "value": { + "type": "float64", + "value": "0.004009600477982423" + } + }, + "Element Throughput": { + "hint": { + "type": "string", + "value": "item_rate" + }, + "short_name": { + "type": "string", + "value": "Elem/s" + }, + "description": { + "type": "string", + "value": "Number of input elements handled per second." + }, + "value": { + "type": "float64", + "value": "32019284380.88381" + } + }, + "Average Global Memory Throughput": { + "hint": { + "type": "string", + "value": "byte_rate" + }, + "short_name": { + "type": "string", + "value": "GlobalMem BW" + }, + "description": { + "type": "string", + "value": "Number of bytes read/written per second to the CUDA device's global memory." + }, + "value": { + "type": "float64", + "value": "512308550094.1409" + } + }, + "Percent Peak Global Memory Throughput": { + "hint": { + "type": "string", + "value": "percentage" + }, + "short_name": { + "type": "string", + "value": "BWPeak" + }, + "description": { + "type": "string", + "value": "Global device memory throughput as a percentage of the device's peak bandwidth." + }, + "value": { + "type": "float64", + "value": "0.6997221237081251" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00026007244216493403" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2008" + } + } + }, + "is_skipped": false + }, + "Device=1 In=F64 Out=I8": { + "device": 1, + "type_config_index": 30, + "min_samples": 10, + "min_time": 0.5, + "max_noise": 0.005, + "skip_time": -1.0, + "timeout": 0.5, + "axis_values": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 65536 + "value": "I8" } }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 524288 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1336 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00016353682634730543 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.11022562313886396 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00015515176088599353 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.10908954362964919 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 422399330.9889422 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 6758389295.823075 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.052786719693694355 - } - } - }, - "is_skipped": false + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^18": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=I16": { + "device": 1, + "type_config_index": 31, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 262144 + "value": "I16" } }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 587 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0004996207836456557 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.024762621528400496 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0004879914401844776 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.01040682032664116 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 537189750.5023869 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 8595036008.03819 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.06713193582884115 - } - } - }, - "is_skipped": false + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=I32": { + "device": 1, + "type_config_index": 32, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 + "value": "I32" } }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 188 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0017351010638297873 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.012349562772288485 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0017246832353003474 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004554329966154549 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 607981789.6631866 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9727708634.610985 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07597872902564191 - } - } - }, - "is_skipped": false + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=F32": { + "device": 1, + "type_config_index": 33, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 + "value": "F32" } }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 50 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0066887959999999995 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0030118761449178105 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.006673763227462767 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0026885930302360814 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 628476596.6434491 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10055625546.295185 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07853993959553225 - } - } - }, - "is_skipped": false + "summaries": null, + "is_skipped": true, + "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=I64": { + "device": 1, + "type_config_index": 34, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { + "Out": { "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 + "value": "I64" } }, "summaries": { + "Element count: Items": { + "short_name": { + "type": "string", + "value": "Items" + }, + "value": { + "type": "int64", + "value": "8388608" + } + }, "Input Buffer Size: ": { "hint": { "type": "string", @@ -14570,11 +18565,25 @@ }, "short_name": { "type": "string", - "value": "Size" + "value": "InSize" }, "value": { "type": "int64", - "value": 134217728 + "value": "67108864" + } + }, + "Output Buffer Size: ": { + "hint": { + "type": "string", + "value": "bytes" + }, + "short_name": { + "type": "string", + "value": "OutSize" + }, + "value": { + "type": "int64", + "value": "67108864" } }, "Number of Samples (Cold)": { @@ -14592,7 +18601,7 @@ }, "value": { "type": "int64", - "value": 13 + "value": "1753" } }, "Average CPU Time (Cold)": { @@ -14610,7 +18619,7 @@ }, "value": { "type": "float64", - "value": 0.026423876923076926 + "value": "0.00026657142213348556" } }, "CPU Relative Standard Deviation (Cold)": { @@ -14628,7 +18637,7 @@ }, "value": { "type": "float64", - "value": 0.0007777926505595894 + "value": "0.004288873685096382" } }, "Average GPU Time (Cold)": { @@ -14646,7 +18655,7 @@ }, "value": { "type": "float64", - "value": 0.026412524149968072 + "value": "0.0002619141041552483" } }, "GPU Relative Standard Deviation (Cold)": { @@ -14664,7 +18673,7 @@ }, "value": { "type": "float64", - "value": 0.0007825372143057571 + "value": "0.00422427515777647" } }, "Element Throughput": { @@ -14682,7 +18691,7 @@ }, "value": { "type": "float64", - "value": 635199267.7693503 + "value": "32028088090.39048" } }, "Average Global Memory Throughput": { @@ -14700,7 +18709,7 @@ }, "value": { "type": "float64", - "value": 10163188284.309605 + "value": "512449409446.2477" } }, "Percent Peak Global Memory Throughput": { @@ -14718,8326 +18727,69 @@ }, "value": { "type": "float64", - "value": 0.07938006345530496 + "value": "0.6999145124648269" + } + }, + "Average GPU Time (Batch)": { + "hint": { + "type": "string", + "value": "duration" + }, + "short_name": { + "type": "string", + "value": "Batch GPU" + }, + "description": { + "type": "string", + "value": "Average back-to-back kernel execution time as measured by CUDA events." + }, + "value": { + "type": "float64", + "value": "0.00026007216520352087" + } + }, + "Number of Samples (Batch)": { + "hint": { + "type": "string", + "value": "sample_size" + }, + "short_name": { + "type": "string", + "value": "Batch" + }, + "description": { + "type": "string", + "value": "Number of kernel executions in hot time measurements." + }, + "value": { + "type": "int64", + "value": "2013" } } }, "is_skipped": false }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 10, + "Device=1 In=F64 Out=F64": { + "device": 1, + "type_config_index": 35, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { - "Key": { + "In": { "type": "string", "value": "F64" }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 4 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.10656022500000001 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.10654843139648437 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 629843753.8726102 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10077500061.961763 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07871079153619223 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 10, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { + "Out": { "type": "string", "value": "F64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 } }, "summaries": null, "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=F64 Input=Rand Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 10, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "F64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - } - } - }, - { - "index": 1, - "name": "cub::DeviceRadixSort::SortKeys - Constant Values", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "devices": [ - 0 - ], - "axes": { - "Key": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "U8", - "description": "uint8_t", - "is_active": true - }, - { - "input_string": "U16", - "description": "uint16_t", - "is_active": true - }, - { - "input_string": "U32", - "description": "uint32_t", - "is_active": true - }, - { - "input_string": "U64", - "description": "uint64_t", - "is_active": true - } - ] - }, - "Input": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "Const", - "description": "All values = 42", - "is_active": true - } - ] - }, - "Pattern": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "Ascend", - "description": "", - "is_active": true - } - ] - }, - "Elements": { - "type": "int64", - "flags": "pow2", - "values": [ - { - "input_string": "20", - "description": "2^20 = 1048576", - "value": 1048576 - }, - { - "input_string": "22", - "description": "2^22 = 4194304", - "value": 4194304 - }, - { - "input_string": "24", - "description": "2^24 = 16777216", - "value": 16777216 - }, - { - "input_string": "26", - "description": "2^26 = 67108864", - "value": 67108864 - }, - { - "input_string": "28", - "description": "2^28 = 268435456", - "value": 268435456 - }, - { - "input_string": "30", - "description": "2^30 = 1073741824", - "value": 1073741824 - } - ] - } - }, - "states": { - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2131 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 9.847855466916956e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.1166410494531917 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 8.792886719743954e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02950844558658552 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 11925275889.7198 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 23850551779.4396 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1862858643107942 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1103 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00030513427017225814 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.03970046347053595 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.000298017248173573 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.010246113439079385 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 14074031035.804775 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 28148062071.60955 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.21985177199145176 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 364 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0011047906593406591 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004165150297152156 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.001098251174111943 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0041277328508611206 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15276301446.767155 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 30552602893.53431 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.23863255196774485 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 99 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.004277376767676769 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007146089620860907 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.004269709249939582 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007192485269555904 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15717431813.641554 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31434863627.283108 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24552349121534545 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 26 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.016944819230769233 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004125824124512113 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.016930290442246653 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004190066574775472 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15855336735.993914 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31710673471.987827 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24767771707063724 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Const Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0677445 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004186949546309279 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.06773285457066126 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004172460379707014 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15852599610.72267 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31705199221.44534 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24763496017749734 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1317 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00024883545937737277 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.014216253160288001 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00024280052241178212 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.014413629746702477 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4318672750.718583 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17274691002.874332 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13492479226189025 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 479 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0008450308977035485 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004254901627868165 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0008386715661757668 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004159295165978451 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 5001128175.986078 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20004512703.944313 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15624619395107717 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 139 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.003159730215827337 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0012442254198139842 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00315222075688753 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0011064757684230242 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 5322348050.447346 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 21289392201.789383 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1662818061249483 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 36 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.012422872222222228 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0023351107937830437 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.012409479114744399 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0016255550269386443 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 5407871142.654505 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 21631484570.61802 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1689537347742597 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 10 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.049459970000000006 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0006367677998496455 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.04944267120361327 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0006514143080448675 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 5429226404.344892 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 21716905617.379566 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.16962091990580141 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Const Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 781 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0004726912932138284 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.013777372000949644 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0004643977672510366 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.005762605261042134 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2257926445.6996794 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 18063411565.597435 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.14108513157333663 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 250 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0017142272 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006995479034955076 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0017067146277427662 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007040239153063608 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2457530937.9912105 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 19660247503.929684 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15355729430087545 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 68 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.006638291176470588 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007984223254281496 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.006619076686746933 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0014220730690780873 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2534676178.2639914 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20277409426.11193 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.15837766672481826 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 18 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.02635283888888889 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0019931714593177012 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.026339953634474014 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0020022842651574563 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2547797347.379048 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20382378779.032383 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1591975348274836 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 5 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.10489146 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0007390240134400786 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.10486280975341797 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.00048311871799088955 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 2559872815.07351 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 20478982520.58808 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1599520629263628 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Const Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^20": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 257 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0017021642023346295 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.004191102519162322 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0016938885023621748 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.002849428031422373 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 619034841.1585128 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 9904557458.536205 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07736001514102885 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^22": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 70 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.006613181428571428 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.00494400837539835 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.006595767314093454 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0035342650950247294 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 635908424.3372039 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10174534789.395262 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.07946868587068281 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^24": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 19 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.026098378947368425 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0013667184743594541 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.026086989252190843 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0013380879892475642 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 643125806.4244041 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10290012902.790466 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.08037063314476432 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^26": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 5 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.10427278 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.000645711511791114 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.10425362091064454 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.000694228812251099 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 643707752.4388223 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 10299324039.021156 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.08044335821529897 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^28": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Const Pattern=Ascend Elements=2^30": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Const" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - } - } - }, - { - "index": 2, - "name": "cub::DeviceRadixSort::SortKeys - Half Word", - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "devices": [ - 0 - ], - "axes": { - "Key": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "U8", - "description": "uint8_t", - "is_active": true - }, - { - "input_string": "U16", - "description": "uint16_t", - "is_active": true - }, - { - "input_string": "U32", - "description": "uint32_t", - "is_active": true - }, - { - "input_string": "U64", - "description": "uint64_t", - "is_active": true - } - ] - }, - "Input": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "Rand", - "description": "Random values uniformly distributed across `T`'s value range", - "is_active": true - } - ] - }, - "Pattern": { - "type": "type", - "flags": "", - "values": [ - { - "input_string": "Ascend", - "description": "", - "is_active": true - } - ] - }, - "Elements": { - "type": "int64", - "flags": "pow2", - "values": [ - { - "input_string": "20", - "description": "2^20 = 1048576", - "value": 1048576 - }, - { - "input_string": "22", - "description": "2^22 = 4194304", - "value": 4194304 - }, - { - "input_string": "24", - "description": "2^24 = 16777216", - "value": 16777216 - }, - { - "input_string": "26", - "description": "2^26 = 67108864", - "value": 67108864 - }, - { - "input_string": "28", - "description": "2^28 = 268435456", - "value": 268435456 - }, - { - "input_string": "30", - "description": "2^30 = 1073741824", - "value": 1073741824 - } - ] - }, - "Bits": { - "type": "string", - "flags": "", - "values": [ - { - "input_string": "Half", - "description": "", - "value": "Half" - } - ] - } - }, - "states": { - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1048576 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 506 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 9.122292490118582e-05 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.14210402239874367 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 8.171724868209468e-05 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.042136506336878675 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 12831758495.434473 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 25663516990.868946 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.20044611496242304 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 131 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0002768381679389314 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.040993814974847456 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00026943584525858166 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04165661510853529 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 15566985884.801865 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 31133971769.60373 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.24317336110975168 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 33 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0009524848484848485 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0029631519174044907 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0009448620568622242 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0027350116544227862 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 17756259634.041355 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 35512519268.08271 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.27737221372846405 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 8 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0037811125 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.017870484219602495 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0037547920346260076 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.011926878925089247 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 17872857772.45032 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 35745715544.90064 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2791936042934629 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.014637200000000001 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.014622128009796141 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 18358166186.218643 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 36716332372.43729 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2867746529964172 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U8 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { - "device": 0, - "type_config_index": 0, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U8" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 1 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0650038 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.06499456024169922 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 16520487560.91296 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 33040975121.82592 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.2580681011139865 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 2097152 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 451 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0002551281596452328 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.02001747605537379 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0002489812859476538 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.020267049724350405 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 4211465114.7735424 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 16845860459.09417 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.13157539098892596 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 116 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0007822155172413793 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.023070481778936412 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.000775460692315266 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.023745726673375194 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 5408789951.01249 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 21635159804.04996 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.16898244035905055 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 29 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0027147862068965514 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.006919240505320664 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0027074648265180915 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.00697027260615485 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6196651507.962958 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 24786606031.851833 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.19359696038374652 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.010597285714285715 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007928262423253452 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.010588553019932338 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.007853809341114676 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6337869194.560527 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 25351476778.242107 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.19800891010249083 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.04330945 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.04329145622253418 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 6200656651.976361 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 24802626607.905445 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.19372208985179834 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U16 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { - "device": 0, - "type_config_index": 1, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U16" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 4194304 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 393 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.00035548193384223926 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.03038537754729681 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00034845801535756846 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.030622610260206715 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3009188923.159104 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 24073511385.27283 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.18802730087222594 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 16777216 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 102 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0012527852941176467 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.03836046238706917 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.001244852706497791 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.0384242586382674 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3369317492.830179 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 26954539942.641434 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.21052971087416766 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 67108864 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 26 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.004759565384615385 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.012430636692892466 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.00474302521118751 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.011928461924356468 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3537239473.327508 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 28297915786.620064 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.22102221153008672 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 268435456 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 7 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.019093514285714284 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.013031432629676501 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.01906842068263463 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.013252538341278812 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3519371903.784103 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 28154975230.272823 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.21990576754462027 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 1073741824 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 2 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.07657910000000001 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0765664176940918 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": null - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 3505916354.510519 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 28047330836.084152 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.21906500590543107 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U32 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { - "device": 0, - "type_config_index": 2, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U32" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^20 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1048576 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 8388608 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 270 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0009398844444444449 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.023251730900045106 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0009325108146225963 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.023261920514979124 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1124465243.252302 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 17991443892.03683 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.14052302465037514 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^22 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 4194304 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 33554432 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 68 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.0036767867647058824 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.04207676223538282 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.0036685091747957112 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.042174363328430726 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1143326566.7745178 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 18293225068.392284 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.14288010082160932 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^24 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 16777216 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 134217728 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 18 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.014109038888888889 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.002421042106667527 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.014098197301228839 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.002451783799133172 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1190025621.1152365 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 19040409937.843784 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.1487160236334962 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^26 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 67108864 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": { - "Input Buffer Size: ": { - "hint": { - "type": "string", - "value": "bytes" - }, - "short_name": { - "type": "string", - "value": "Size" - }, - "value": { - "type": "int64", - "value": 536870912 - } - }, - "Number of Samples (Cold)": { - "hint": { - "type": "string", - "value": "sample_size" - }, - "short_name": { - "type": "string", - "value": "Samples" - }, - "description": { - "type": "string", - "value": "Number of kernel executions in cold time measurements." - }, - "value": { - "type": "int64", - "value": 5 - } - }, - "Average CPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "CPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time observed from host." - }, - "value": { - "type": "float64", - "value": 0.056853980000000005 - } - }, - "CPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold CPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.013346033114121256 - } - }, - "Average GPU Time (Cold)": { - "hint": { - "type": "string", - "value": "duration" - }, - "short_name": { - "type": "string", - "value": "GPU Time" - }, - "description": { - "type": "string", - "value": "Average isolated kernel execution time as measured by CUDA events." - }, - "value": { - "type": "float64", - "value": 0.05684054412841797 - } - }, - "GPU Relative Standard Deviation (Cold)": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "Noise" - }, - "description": { - "type": "string", - "value": "Relative standard deviation of the cold GPU execution time measurements." - }, - "value": { - "type": "float64", - "value": 0.013346516895045969 - } - }, - "Element Throughput": { - "hint": { - "type": "string", - "value": "item_rate" - }, - "short_name": { - "type": "string", - "value": "Elem/s" - }, - "description": { - "type": "string", - "value": "Number of input elements handled per second." - }, - "value": { - "type": "float64", - "value": 1180651329.5928898 - } - }, - "Average Global Memory Throughput": { - "hint": { - "type": "string", - "value": "byte_rate" - }, - "short_name": { - "type": "string", - "value": "GlobalMem BW" - }, - "description": { - "type": "string", - "value": "Number of bytes read/written per second to the CUDA device's global memory." - }, - "value": { - "type": "float64", - "value": 18890421273.486237 - } - }, - "Percent Peak Global Memory Throughput": { - "hint": { - "type": "string", - "value": "percentage" - }, - "short_name": { - "type": "string", - "value": "BWPeak" - }, - "description": { - "type": "string", - "value": "Global device memory throughput as a percentage of the device's peak bandwidth." - }, - "value": { - "type": "float64", - "value": 0.14754453006659457 - } - } - }, - "is_skipped": false - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^28 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 268435456 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" - }, - "Device=0 Key=U64 Input=Rand Pattern=Ascend Elements=2^30 Bits=Half": { - "device": 0, - "type_config_index": 3, - "min_samples": 10, - "min_time": 0.5, - "max_noise": 0.005, - "skip_time": -1.0, - "timeout": 0.5, - "axis_values": { - "Key": { - "type": "string", - "value": "U64" - }, - "Input": { - "type": "string", - "value": "Rand" - }, - "Pattern": { - "type": "string", - "value": "Ascend" - }, - "Elements": { - "type": "int64", - "value": 1073741824 - }, - "Bits": { - "type": "string", - "value": "Half" - } - }, - "summaries": null, - "is_skipped": true, - "skip_reason": "Unexpected error: bad allocation: cudaErrorMemoryAllocation: out of memory" + "skip_reason": "Not a conversion: InputType == OutputType." } } }