{ "devices": [ { "id": 0, "name": "NVIDIA Quadro GV100", "sm_version": 700, "ptx_version": 700, "sm_default_clock_rate": 1627000000, "number_of_sms": 80, "max_blocks_per_sm": 32, "max_threads_per_sm": 2048, "max_threads_per_block": 1024, "registers_per_sm": 65536, "registers_per_block": 65536, "global_memory_size": 34078982144, "global_memory_bus_peak_clock_rate": 850000000, "global_memory_bus_width": 4096, "global_memory_bus_bandwidth": 870400000000, "l2_cache_size": 6291456, "shared_memory_per_sm": 98304, "shared_memory_per_block": 49152, "ecc_state": false }, { "id": 1, "name": "NVIDIA Quadro GP100", "sm_version": 600, "ptx_version": 600, "sm_default_clock_rate": 1442500000, "number_of_sms": 56, "max_blocks_per_sm": 32, "max_threads_per_sm": 2048, "max_threads_per_block": 1024, "registers_per_sm": 65536, "registers_per_block": 65536, "global_memory_size": 17069309952, "global_memory_bus_peak_clock_rate": 715000000, "global_memory_bus_width": 4096, "global_memory_bus_bandwidth": 732160000000, "l2_cache_size": 4194304, "shared_memory_per_sm": 65536, "shared_memory_per_block": 49152, "ecc_state": false } ], "benchmarks": [ { "index": 0, "name": "simple", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ 0, 1 ], "axes": null, "states": { "Device=0": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": null, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "486" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010095795164609047" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0006114730449640358" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010034803637751827" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0005535128658782786" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.001001473929135854" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "524" } } }, "is_skipped": false }, "Device=1": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": null, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "488" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010075532745901644" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0005196761038903798" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010027413077530309" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0003559489414701089" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010014738126565483" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "524" } } }, "is_skipped": false } } }, { "index": 1, "name": "single_float64_axis", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ 0, 1 ], "axes": { "Duration": { "type": "float64", "flags": "", "values": [ { "input_string": "0", "description": "", "value": 0.0 }, { "input_string": "0.0001", "description": "", "value": 0.0001 }, { "input_string": "0.0002", "description": "", "value": 0.0002 }, { "input_string": "0.0003", "description": "", "value": 0.00030000000000000003 }, { "input_string": "0.0004", "description": "", "value": 0.0004 }, { "input_string": "0.0005", "description": "", "value": 0.0005 }, { "input_string": "0.0006", "description": "", "value": 0.0006000000000000001 }, { "input_string": "0.0007", "description": "", "value": 0.0007000000000000001 }, { "input_string": "0.0008", "description": "", "value": 0.0008000000000000001 }, { "input_string": "0.0009", "description": "", "value": 0.0009000000000000002 }, { "input_string": "0.001", "description": "", "value": 0.0010000000000000002 } ] } }, "states": { "Device=0 Duration=0": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "14050" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "9.162447829181515e-06" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.03536831341378678" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "3.7685477789450405e-06" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.1242757507930245" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "1.6396544558213103e-06" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "305626" } } }, "is_skipped": false }, "Device=0 Duration=0.0001": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0001" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "3833" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00010862307644142961" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.003939155614687134" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00010305688671520844" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.004761970040891668" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00010137620362095862" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "5088" } } }, "is_skipped": false }, "Device=0 Duration=0.0002": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0002" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "2173" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0002089772070869765" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0021045460135476644" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00020339080587790604" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0024397165593270475" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00020172840121363044" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "2582" } } }, "is_skipped": false }, "Device=0 Duration=0.0003": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.00030000000000000003" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1519" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00030826679394338436" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0014114649766999914" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0003027125938382783" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0016421113166888573" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0003010571695496376" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1742" } } }, "is_skipped": false }, "Device=0 Duration=0.0004": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0004" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1166" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0004085880488850769" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0010128960046765418" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004030490282469304" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0012417175292930155" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004014095938278854" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1304" } } }, "is_skipped": false }, "Device=0 Duration=0.0005": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0005" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "945" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0005090367534391529" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0008945139151387666" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0005034566609317042" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0009551336090877046" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0005017609577982818" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1044" } } }, "is_skipped": false }, "Device=0 Duration=0.0006": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0006000000000000001" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "796" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0006083192776381908" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0006965661259016556" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.000602732541573109" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0008277718240662281" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006010903174724053" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "872" } } }, "is_skipped": false }, "Device=0 Duration=0.0007": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0007000000000000001" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "686" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0007086454693877553" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0005852496456482882" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007030655056151287" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0007134410597090336" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007014413925415692" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "748" } } }, "is_skipped": false }, "Device=0 Duration=0.0008": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0008000000000000001" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "603" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0008089611791044773" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0005396538572101352" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0008033946325529826" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0006279991294953942" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0008017935563300363" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "654" } } }, "is_skipped": false }, "Device=0 Duration=0.0009": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0009000000000000002" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "539" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0009083643543599264" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.00046818252995143266" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009027890649266406" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0005476172787827472" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009011217884181701" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "582" } } }, "is_skipped": false }, "Device=0 Duration=0.001": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0010000000000000002" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "486" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010087072057613155" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.00041872799651378016" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010031157275776806" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0004937944200398705" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.001001475909284053" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "524" } } }, "is_skipped": false }, "Device=1 Duration=0": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "14964" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "8.23057591553059e-06" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.050523894758860086" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "3.353875434461126e-06" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.05725803858596207" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "1.355632943746511e-06" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "368832" } } }, "is_skipped": false }, "Device=1 Duration=0.0001": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0001" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "3942" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.000107132078640284" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0038634259631888097" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0001024897333569082" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.003060119065396588" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0001013761587297066" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "5074" } } }, "is_skipped": false }, "Device=1 Duration=0.0002": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0002" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "2208" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0002075046254528986" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.001976178510512602" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00020285036215099666" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0015468676288172283" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00020172864733863198" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "2595" } } }, "is_skipped": false }, "Device=1 Duration=0.0003": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.00030000000000000003" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1537" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0003067784710474956" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0013859894180016342" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0003021601552535132" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0010334107287380953" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0003010565582609204" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1737" } } }, "is_skipped": false }, "Device=1 Duration=0.0004": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0004" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1176" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0004071361462585035" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0010022100665278571" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00040249948315068836" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0007542402160340061" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004014089213396104" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1306" } } }, "is_skipped": false }, "Device=1 Duration=0.0005": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0005" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "951" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0005075497234490012" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0007999383507906208" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0005028805712670812" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0005997416033360081" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0005017610334757409" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1046" } } }, "is_skipped": false }, "Device=1 Duration=0.0006": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0006000000000000001" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "800" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0006068307462500002" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.000718570216952097" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006021880812197924" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0005176533625753945" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006010893901462271" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "873" } } }, "is_skipped": false }, "Device=1 Duration=0.0007": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0007000000000000001" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "690" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0007071831971014495" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0005757596123903934" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007025585162466849" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.00045240029617132243" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007014411477481618" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "748" } } }, "is_skipped": false }, "Device=1 Duration=0.0008": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0008000000000000001" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "606" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0008075822128712869" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0005708387835625503" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0008029147460319033" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0004053734463924786" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00080179443359375" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "655" } } }, "is_skipped": false }, "Device=1 Duration=0.0009": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0009000000000000002" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "541" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0009068751312384467" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0004621968439524866" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009022267493875558" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0003405832639595575" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009011221046513186" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "583" } } }, "is_skipped": false }, "Device=1 Duration=0.001": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "Duration": { "type": "float64", "value": "0.0010000000000000002" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "488" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010072327028688517" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0004212022611220011" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010025862904845687" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0003195925477697865" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010014755598461355" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "524" } } }, "is_skipped": false } } }, { "index": 2, "name": "copy_sweep_grid_shape", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ 0, 1 ], "axes": { "BlockSize": { "type": "int64", "flags": "pow2", "values": [ { "input_string": "6", "description": "2^6 = 64", "value": 64 }, { "input_string": "8", "description": "2^8 = 256", "value": 256 }, { "input_string": "10", "description": "2^10 = 1024", "value": 1024 } ] }, "NumBlocks": { "type": "int64", "flags": "pow2", "values": [ { "input_string": "6", "description": "2^6 = 64", "value": 64 }, { "input_string": "8", "description": "2^8 = 256", "value": 256 }, { "input_string": "10", "description": "2^10 = 1024", "value": 1024 } ] } }, "states": { "Device=0 BlockSize=2^6 NumBlocks=2^6": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "64" }, "NumBlocks": { "type": "int64", "value": "64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "70" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.007152438914285718" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.05103516144522029" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.007146643659046716" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.05105065000806433" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "9390263066.362482" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "75122104530.89986" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.08630756494818458" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.006479399461012621" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "78" } } }, "is_skipped": false }, "Device=0 BlockSize=2^8 NumBlocks=2^6": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "256" }, "NumBlocks": { "type": "int64", "value": "64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "229" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.002168227908296944" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0073699479168641575" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0021626523655054347" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.007425464144896749" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "31030814323.371826" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "248246514586.9746" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.28520969047216754" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.002159405241287294" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "243" } } }, "is_skipped": false }, "Device=0 BlockSize=2^10 NumBlocks=2^6": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "1024" }, "NumBlocks": { "type": "int64", "value": "64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "448" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010965164419642858" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.012922365203073934" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010910109984023236" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.013003332317485732" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "61510712631.013084" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "492085701048.1047" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.565355814623282" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010869085366833847" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "486" } } }, "is_skipped": false }, "Device=0 BlockSize=2^6 NumBlocks=2^8": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "64" }, "NumBlocks": { "type": "int64", "value": "256" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "229" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.002169319052401745" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.003922215380349919" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0021638464469576494" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.003914908768819146" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "31013690502.09386" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "248109524016.7509" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.2850523024089509" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0021620516050990224" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "243" } } }, "is_skipped": false }, "Device=0 BlockSize=2^8 NumBlocks=2^8": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "256" }, "NumBlocks": { "type": "int64", "value": "256" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "456" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.001076449870614035" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.011436755828819038" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010709658272956548" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.01147415502832018" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "62662003109.342606" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "501296024874.74084" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5759375285785165" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010696090290923384" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "487" } } }, "is_skipped": false }, "Device=0 BlockSize=2^10 NumBlocks=2^8": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "1024" }, "NumBlocks": { "type": "int64", "value": "256" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "500" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0009796881099999996" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.006630669601394768" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009742486392259615" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.006702329573169841" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "68882686922.01393" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "551061495376.1115" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6331129312685104" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009725467921183118" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "542" } } }, "is_skipped": false }, "Device=0 BlockSize=2^6 NumBlocks=2^10": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "64" }, "NumBlocks": { "type": "int64", "value": "1024" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "459" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010702333769063192" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.009289316979889557" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010647220575212134" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.009351871253339476" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "63029467198.450455" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "504235737587.60364" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5793149558681108" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010658674782853784" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "492" } } }, "is_skipped": false }, "Device=0 BlockSize=2^8 NumBlocks=2^10": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "256" }, "NumBlocks": { "type": "int64", "value": "1024" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "500" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.000979172994" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.007443039949868979" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009736743034124385" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.007443203952714375" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "68923318367.14127" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "551386546937.1301" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6334863820509308" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009717381278159657" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "541" } } }, "is_skipped": false }, "Device=0 BlockSize=2^10 NumBlocks=2^10": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "1024" }, "NumBlocks": { "type": "int64", "value": "1024" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "474" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010353058628691984" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.02225997387052118" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010297924077712025" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.022347468768318675" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "65167371106.61446" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "521338968852.9157" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5989648079652065" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010282588096414032" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "522" } } }, "is_skipped": false }, "Device=1 BlockSize=2^6 NumBlocks=2^6": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "64" }, "NumBlocks": { "type": "int64", "value": "64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "76" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0066473009473684225" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0010215778080601146" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.006642639580525849" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.001033363366599417" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "10102740512.482763" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "80821924099.8621" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.11038833601926096" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.006642121520223497" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "79" } } }, "is_skipped": false }, "Device=1 BlockSize=2^8 NumBlocks=2^6": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "256" }, "NumBlocks": { "type": "int64", "value": "64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "216" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0023017428981481495" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0022391000845061416" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.002297048738709203" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.002236143378518806" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "29215254717.542892" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "233722037740.34314" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.3192226258472781" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.002296809949372944" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "228" } } }, "is_skipped": false }, "Device=1 BlockSize=2^10 NumBlocks=2^6": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "1024" }, "NumBlocks": { "type": "int64", "value": "64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "418" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.001179723488038277" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0038851033159110274" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.001175016037870252" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0039137311385035334" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "57113147256.81244" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "456905178054.4995" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6240509971242618" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011733407974243164" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "448" } } }, "is_skipped": false }, "Device=1 BlockSize=2^6 NumBlocks=2^8": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "64" }, "NumBlocks": { "type": "int64", "value": "256" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "224" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.002222525227678572" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0013751517218031045" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.002217807294002601" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0013709949704347455" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "30259105099.65222" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "242072840797.21777" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.33062833369375244" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.002216307048556171" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "237" } } }, "is_skipped": false }, "Device=1 BlockSize=2^8 NumBlocks=2^8": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "256" }, "NumBlocks": { "type": "int64", "value": "256" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "434" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0011343795576036872" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0063161302839560935" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011296571429973376" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.006343846096918854" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "59406399911.69264" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "475251199293.54114" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6491083906434948" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011274932015425106" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "469" } } }, "is_skipped": false }, "Device=1 BlockSize=2^10 NumBlocks=2^8": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "1024" }, "NumBlocks": { "type": "int64", "value": "256" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "437" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0011263629336384434" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.002337926005235178" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011216670730294026" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.002335621873443411" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "59829574758.535194" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "478636598068.28156" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6537322416797989" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011199346014793882" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "470" } } }, "is_skipped": false }, "Device=1 BlockSize=2^6 NumBlocks=2^10": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "64" }, "NumBlocks": { "type": "int64", "value": "1024" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "439" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0011228420820045561" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0030427104293198376" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011181568284784197" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0030409709404223893" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "60017398535.51786" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "480139188284.1429" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6557845119702563" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011164653372257314" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "470" } } }, "is_skipped": false }, "Device=1 BlockSize=2^8 NumBlocks=2^10": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "256" }, "NumBlocks": { "type": "int64", "value": "1024" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "440" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00112084285909091" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0025157975228794673" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011161849425597621" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0025072621927497768" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "60123427078.40005" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "480987416627.2004" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6569430406293711" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011137209100238348" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "472" } } }, "is_skipped": false }, "Device=1 BlockSize=2^10 NumBlocks=2^10": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "BlockSize": { "type": "int64", "value": "1024" }, "NumBlocks": { "type": "int64", "value": "1024" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "464" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010599195581896552" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0019836197107494535" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010552384144273295" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0019838471234599063" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "63595925889.809" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "508767407118.472" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.69488555386592" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00105384634014122" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "499" } } }, "is_skipped": false } } }, { "index": 3, "name": "copy_type_sweep", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ 0, 1 ], "axes": { "T": { "type": "type", "flags": "", "values": [ { "input_string": "U8", "description": "uint8_t", "is_active": true }, { "input_string": "U16", "description": "uint16_t", "is_active": true }, { "input_string": "U32", "description": "uint32_t", "is_active": true }, { "input_string": "U64", "description": "uint64_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] } }, "states": { "Device=0 T=U8": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "U8" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "217" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0022855767235023037" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.00300372701685277" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00228007503588628" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0030435512411696388" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "117730974540.34332" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "235461949080.68665" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.27052154076365653" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0022790989087975544" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "230" } } }, "is_skipped": false }, "Device=0 T=U16": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "U16" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "342" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0014443213274853803" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.005313412134692269" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0014388333057102406" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0053179597407149795" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "93282333309.4497" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "373129333237.7988" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.4286871935176917" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.001437488301595052" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "360" } } }, "is_skipped": false }, "Device=0 T=U32": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "U32" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "456" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010777073771929832" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.011572026564316875" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010722620346044238" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.011623749514354471" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "62586253951.21598" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "500690031609.72784" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5752413046986763" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.001070086296237245" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "490" } } }, "is_skipped": false }, "Device=0 T=U64": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "U64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "514" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0009532965797665363" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0051196185177249205" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009478394400748767" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.005131328754237089" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "35400966219.92148" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "566415459518.7437" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6507530555132625" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009458808417792793" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "555" } } }, "is_skipped": false }, "Device=0 T=F32": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "F32" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "456" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010760199342105259" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.010177745756624636" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010705234403150128" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.010199116668820488" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "62687897782.2779" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "501503182258.2232" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5761755310871131" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010701700846354166" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "489" } } }, "is_skipped": false }, "Device=0 T=F64": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "F64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "514" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0009537802023346293" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.00826653498170841" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009483462104769544" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.008316637345502817" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "35382048907.14371" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "566112782514.2993" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6504053107930828" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0009457701526988636" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "550" } } }, "is_skipped": false }, "Device=1 T=U8": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "U8" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "184" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0027052529021739146" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.003601477829562271" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0027005073000555477" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0036073295118906625" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "99401862751.66835" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "198803725503.3367" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.2715304380235696" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0027012594746802137" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "193" } } }, "is_skipped": false }, "Device=1 T=U16": { "device": 1, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "U16" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "325" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0015229335907692304" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.00461287264596262" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.001518287454751822" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.004612569935175065" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "88400735697.27223" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "353602942789.0889" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.48295856477967786" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.001517369088409953" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "346" } } }, "is_skipped": false }, "Device=1 T=U32": { "device": 1, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "U32" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "435" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0011324326574712646" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.006407601007533829" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011277568740406254" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.006437993538809961" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "59506499623.0584" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "476051996984.4672" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6502021374897116" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011264256719333023" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "469" } } }, "is_skipped": false }, "Device=1 T=U64": { "device": 1, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "U64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "468" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010516201538461542" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.002564258397948022" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010469369577546404" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0025731489764373736" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "32050097908.439487" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "512801566535.0318" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.700395496250863" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010448107472453933" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "502" } } }, "is_skipped": false }, "Device=1 T=F32": { "device": 1, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "F32" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "435" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.001132957098850574" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.006385301490538342" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011282652709675928" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.006406051844181131" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "59479685962.90115" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "475837487703.2092" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6499091560631682" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011265583781452921" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "462" } } }, "is_skipped": false }, "Device=1 T=F64": { "device": 1, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "T": { "type": "string", "value": "F64" } }, "summaries": { "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "468" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0010518258760683764" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.002638851741610878" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010471444782028856" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0026422486584488855" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "32043746300.974895" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "512699940815.5983" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.7002566936401856" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0010447956953898514" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "505" } } }, "is_skipped": false } } }, { "index": 4, "name": "copy_type_conversion_sweep", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "devices": [ 0, 1 ], "axes": { "In": { "type": "type", "flags": "", "values": [ { "input_string": "I8", "description": "int8_t", "is_active": true }, { "input_string": "I16", "description": "int16_t", "is_active": true }, { "input_string": "I32", "description": "int32_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "I64", "description": "int64_t", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] }, "Out": { "type": "type", "flags": "", "values": [ { "input_string": "I8", "description": "int8_t", "is_active": true }, { "input_string": "I16", "description": "int16_t", "is_active": true }, { "input_string": "I32", "description": "int32_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "I64", "description": "int64_t", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] } }, "states": { "Device=0 In=I8 Out=I8": { "device": 0, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=0 In=I8 Out=I16": { "device": 0, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "I16" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "775" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.000624855941935483" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0026998034506879763" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006194506773641069" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0027411303205337333" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "108336089461.65392" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "325008268384.96173" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.37340104364081084" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006171294842507731" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "849" } } }, "is_skipped": false }, "Device=0 In=I8 Out=I32": { "device": 0, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "I32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "268435456" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "660" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0007373176257575753" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0039374442120074255" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007318062056194664" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.003968882228293087" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "91703054011.67381" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "458515270058.369" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5267868451957365" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007297926682692308" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "715" } } }, "is_skipped": false }, "Device=0 In=I8 Out=F32": { "device": 0, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "F32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "268435456" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "655" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0007424307770992365" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.004224076490124651" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007369820818646267" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.004224007291086274" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "91059017106.91382" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "455295085534.5691" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5230871846674737" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007352958009195734" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "706" } } }, "is_skipped": false }, "Device=0 In=I8 Out=I64": { "device": 0, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "536870912" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "407" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0012081452088452092" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.009488803674295956" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0012026181030624918" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.009557481496348873" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "55802306508.69623" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "502220758578.26605" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5769999524106917" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0012018776918068911" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "429" } } }, "is_skipped": false }, "Device=0 In=I8 Out=F64": { "device": 0, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "536870912" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "415" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0011839576867469879" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.007849889234045669" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011785369482385114" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.007894267956792296" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "56942520215.682335" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "512482681941.14105" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5887898459801713" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0011769149367873732" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "444" } } }, "is_skipped": false }, "Device=0 In=I16 Out=I8": { "device": 0, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=I16 Out=I16": { "device": 0, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=0 In=I16 Out=I32": { "device": 0, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "I32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "33554432" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1105" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00043164991312217215" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.010807224020750296" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004261522234295282" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.010967121645138193" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "78738136645.08269" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "472428819870.49615" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.542772081652684" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004235287455769328" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1232" } } }, "is_skipped": false }, "Device=0 In=I16 Out=F32": { "device": 0, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "F32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "33554432" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1103" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0004325297388939259" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.008132056614414322" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004270972975631891" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.008246119473642231" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "78563906143.74146" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "471383436862.44867" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5415710441893942" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004253676273859287" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1241" } } }, "is_skipped": false }, "Device=0 In=I16 Out=I64": { "device": 0, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "33554432" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "268435456" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "733" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0006613082878581173" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.007945982231516395" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006558368572759433" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.008081739278450547" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "51162772612.94874" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "511627726129.48737" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5878075897627383" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006543398455948913" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "794" } } }, "is_skipped": false }, "Device=0 In=I16 Out=F64": { "device": 0, "type_config_index": 11, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "33554432" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "268435456" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "734" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0006602106062670296" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.007402870806519008" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006547767629577938" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.007440276584065623" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "51245605980.92404" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "512456059809.2404" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.588759259891131" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006532440560455698" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "814" } } }, "is_skipped": false }, "Device=0 In=I32 Out=I8": { "device": 0, "type_config_index": 12, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=I32 Out=I16": { "device": 0, "type_config_index": 13, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=I32 Out=I32": { "device": 0, "type_config_index": 14, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "I32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=0 In=I32 Out=F32": { "device": 0, "type_config_index": 15, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "F32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "67108864" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1735" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0002670209631123916" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.013093461911533781" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0002615653075986357" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.013401071612529727" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "64141594900.43743" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "513132759203.49945" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5895367178349029" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00025968648747412565" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "2023" } } }, "is_skipped": false }, "Device=0 In=I32 Out=I64": { "device": 0, "type_config_index": 16, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1233" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00038442103811841063" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.00844365555913354" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.000379033355382238" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.008658982321334757" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "44263165132.474785" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "531157981589.6974" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6102458428190457" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00037769805444141474" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1381" } } }, "is_skipped": false }, "Device=0 In=I32 Out=F64": { "device": 0, "type_config_index": 17, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1233" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0003844277526358471" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.009146640310310188" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00037900421011283686" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.009309598787035932" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "44266568951.84647" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "531198827422.15765" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6102927704758245" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0003778626589998872" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1385" } } }, "is_skipped": false }, "Device=0 In=F32 Out=I8": { "device": 0, "type_config_index": 18, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=F32 Out=I16": { "device": 0, "type_config_index": 19, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=F32 Out=I32": { "device": 0, "type_config_index": 20, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "I32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "67108864" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1726" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00026862367844727737" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.013398984730737511" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0002631768242198105" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.013647890011152018" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "63748835216.53615" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "509990681732.2892" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5859267942696337" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0002611921188044063" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1917" } } }, "is_skipped": false }, "Device=0 In=F32 Out=F32": { "device": 0, "type_config_index": 21, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "F32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=0 In=F32 Out=I64": { "device": 0, "type_config_index": 22, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1235" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0003840306607287451" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.009229816202298303" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00037859300990336473" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.009349966037483663" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "44314648081.54371" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "531775776978.5245" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6109556261242239" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00037728447759194497" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1385" } } }, "is_skipped": false }, "Device=0 In=F32 Out=F64": { "device": 0, "type_config_index": 23, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1233" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0003844510843471213" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.009332888492996015" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00037903595060134466" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.009455810939030309" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "44262862067.25975" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "531154344807.117" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6102416645302355" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00037756963876577526" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1352" } } }, "is_skipped": false }, "Device=0 In=I64 Out=I8": { "device": 0, "type_config_index": 24, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=I64 Out=I16": { "device": 0, "type_config_index": 25, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=I64 Out=I32": { "device": 0, "type_config_index": 26, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "I32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=I64 Out=F32": { "device": 0, "type_config_index": 27, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "F32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=I64 Out=I64": { "device": 0, "type_config_index": 28, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "I64" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=0 In=I64 Out=F64": { "device": 0, "type_config_index": 29, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "8388608" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "67108864" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1863" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00024704845947396656" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.008865365856503242" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00024161205912269328" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.009044628498958832" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "34719326636.507706" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "555509226184.1233" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.638222916112274" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00023959052166815616" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "2168" } } }, "is_skipped": false }, "Device=0 In=F64 Out=I8": { "device": 0, "type_config_index": 30, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=F64 Out=I16": { "device": 0, "type_config_index": 31, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=F64 Out=I32": { "device": 0, "type_config_index": 32, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "I32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=F64 Out=F32": { "device": 0, "type_config_index": 33, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "F32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=0 In=F64 Out=I64": { "device": 0, "type_config_index": 34, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "8388608" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "67108864" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1863" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0002471723081052067" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.008513791485233733" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00024173997316990882" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.008792904583053216" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "34700955286.79488" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "555215284588.7181" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6378852074778472" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0002398694754203643" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "2141" } } }, "is_skipped": false }, "Device=0 In=F64 Out=F64": { "device": 0, "type_config_index": 35, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "F64" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=1 In=I8 Out=I8": { "device": 1, "type_config_index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=1 In=I8 Out=I16": { "device": 1, "type_config_index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "I16" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "704" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0006917278508522718" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.031528564248938934" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0006868111818859521" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.03156956554686085" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "97710791218.80649" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "293132373656.4195" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.4003665505578282" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.000659236081199501" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "789" } } }, "is_skipped": false }, "Device=1 In=I8 Out=I32": { "device": 1, "type_config_index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "I32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "268435456" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "568" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0008635000933098584" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.007603813185561577" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0008587616908718169" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0076405577469716105" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "78146084895.6489" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "390730424478.2445" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5336680841322177" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0008576233512476871" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "608" } } }, "is_skipped": false }, "Device=1 In=I8 Out=F32": { "device": 1, "type_config_index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "F32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "268435456" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "568" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0008623903292253519" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.007683250202065139" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0008576831001211219" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0077139887360741476" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "78244358540.49461" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "391221792702.473" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.5343392055049074" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0008563666250191483" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "612" } } }, "is_skipped": false }, "Device=1 In=I8 Out=I64": { "device": 1, "type_config_index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "536870912" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "339" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0014575299587020652" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.005325090452654586" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0014528400236878067" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.005339082380112657" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "46191502784.76956" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "415723525062.9261" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.56780420271925" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00145044431581602" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "364" } } }, "is_skipped": false }, "Device=1 In=I8 Out=F64": { "device": 1, "type_config_index": 5, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I8" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "67108864" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "536870912" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "339" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0014595411091445434" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.00536065113916752" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0014548183609250722" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.005369023538480178" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "46128689190.67507" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "415158202716.0756" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.567032073202682" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00145255855984158" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "360" } } }, "is_skipped": false }, "Device=1 In=I16 Out=I8": { "device": 1, "type_config_index": 6, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=I16 Out=I16": { "device": 1, "type_config_index": 7, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=1 In=I16 Out=I32": { "device": 1, "type_config_index": 8, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "I32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "33554432" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1042" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00046152892994241876" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.007446740614945881" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00045683037259413987" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.007515900606668647" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "73450527839.16064" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "440703167034.96387" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6019219392413733" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004549347768605374" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1156" } } }, "is_skipped": false }, "Device=1 In=I16 Out=F32": { "device": 1, "type_config_index": 9, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "F32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "33554432" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1046" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0004599197934990448" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.007608321516935087" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00045521636728916755" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.007662230748478094" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "73710952441.8422" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "442265714651.0532" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.604056100648838" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004532274742649026" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1168" } } }, "is_skipped": false }, "Device=1 In=I16 Out=I64": { "device": 1, "type_config_index": 10, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "33554432" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "268435456" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "648" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0007539361157407405" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.005745552244274178" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007492513590388824" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.005767470540109363" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "44783945461.29704" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "447839454612.97046" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6116688355181524" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007466272232380319" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "705" } } }, "is_skipped": false }, "Device=1 In=I16 Out=F64": { "device": 1, "type_config_index": 11, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I16" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "33554432" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "268435456" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "650" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0007517909569230775" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.0052243182117119895" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007470858345581929" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.005225121011834867" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "44913757493.26477" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "449137574932.64764" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6134418363918374" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0007439345558090966" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "707" } } }, "is_skipped": false }, "Device=1 In=I32 Out=I8": { "device": 1, "type_config_index": 12, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=I32 Out=I16": { "device": 1, "type_config_index": 13, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=I32 Out=I32": { "device": 1, "type_config_index": 14, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "I32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=1 In=I32 Out=F32": { "device": 1, "type_config_index": 15, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "F32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "67108864" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1687" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0002777349045643155" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.005691592315009916" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.000273079350458212" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.005675718906016491" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "61437146279.45599" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "491497170235.64795" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6712974899416083" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0002715061958589702" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1930" } } }, "is_skipped": false }, "Device=1 In=I32 Out=I64": { "device": 1, "type_config_index": 16, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1133" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0004230005507502205" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.004752057993974487" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00041831812217818477" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.004789005696384007" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "40106357125.149025" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "481276285501.78827" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6573375840004757" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00041601362464715726" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1251" } } }, "is_skipped": false }, "Device=1 In=I32 Out=F64": { "device": 1, "type_config_index": 17, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I32" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1132" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0004233390768551238" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.004579872590098746" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00041865752666346193" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.004575612011474384" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "40073843013.66299" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "480886116163.9559" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6568046822606478" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004163219633556548" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1260" } } }, "is_skipped": false }, "Device=1 In=F32 Out=I8": { "device": 1, "type_config_index": 18, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=F32 Out=I16": { "device": 1, "type_config_index": 19, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=F32 Out=I32": { "device": 1, "type_config_index": 20, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "I32" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "67108864" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1665" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00028157421321321324" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.01259188984622349" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0002768973456309726" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0128083650869227" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "60590021048.303505" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "484720168386.42804" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6620413139019177" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0002751490314863719" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1917" } } }, "is_skipped": false }, "Device=1 In=F32 Out=F32": { "device": 1, "type_config_index": 21, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "F32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=1 In=F32 Out=I64": { "device": 1, "type_config_index": 22, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1133" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0004230867334510152" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.004737837167796919" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004184002545904713" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0047584325554732645" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "40098484204.84705" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "481181810458.1647" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6572085479378342" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0004160796998517786" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1265" } } }, "is_skipped": false }, "Device=1 In=F32 Out=F64": { "device": 1, "type_config_index": 23, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F32" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "16777216" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "134217728" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1132" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0004234168127208481" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.004661976745113187" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00041872537112383403" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.0046895661377312735" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "40067350003.108116" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "480808200037.2974" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6566982627257668" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00041628507170249205" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1259" } } }, "is_skipped": false }, "Device=1 In=I64 Out=I8": { "device": 1, "type_config_index": 24, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=I64 Out=I16": { "device": 1, "type_config_index": 25, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=I64 Out=I32": { "device": 1, "type_config_index": 26, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "I32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=I64 Out=F32": { "device": 1, "type_config_index": 27, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "F32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=I64 Out=I64": { "device": 1, "type_config_index": 28, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "I64" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, "Device=1 In=I64 Out=F64": { "device": 1, "type_config_index": 29, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "I64" }, "Out": { "type": "string", "value": "F64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "8388608" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "67108864" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1753" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.0002666010844266969" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.004049936615976281" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00026192253768546044" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.00396300201275967" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "32027056831.87056" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "512432909309.92896" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6998919762209476" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.00026010225147830515" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "1995" } } }, "is_skipped": false }, "Device=1 In=F64 Out=I8": { "device": 1, "type_config_index": 30, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "I8" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=F64 Out=I16": { "device": 1, "type_config_index": 31, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "I16" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=F64 Out=I32": { "device": 1, "type_config_index": 32, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "I32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=F64 Out=F32": { "device": 1, "type_config_index": 33, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "F32" } }, "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, "Device=1 In=F64 Out=I64": { "device": 1, "type_config_index": 34, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "I64" } }, "summaries": { "Element count: Items": { "short_name": { "type": "string", "value": "Items" }, "value": { "type": "int64", "value": "8388608" } }, "Input Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "InSize" }, "value": { "type": "int64", "value": "67108864" } }, "Output Buffer Size: ": { "hint": { "type": "string", "value": "bytes" }, "short_name": { "type": "string", "value": "OutSize" }, "value": { "type": "int64", "value": "67108864" } }, "Number of Samples (Cold)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Samples" }, "description": { "type": "string", "value": "Number of kernel executions in cold time measurements." }, "value": { "type": "int64", "value": "1753" } }, "Average CPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "CPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time observed from host." }, "value": { "type": "float64", "value": "0.00026658457387335985" } }, "CPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold CPU execution time measurements." }, "value": { "type": "float64", "value": "0.004159876144452023" } }, "Average GPU Time (Cold)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "GPU Time" }, "description": { "type": "string", "value": "Average isolated kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0002619018185261111" } }, "GPU Relative Standard Deviation (Cold)": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "Noise" }, "description": { "type": "string", "value": "Relative standard deviation of the cold GPU execution time measurements." }, "value": { "type": "float64", "value": "0.004108154173032601" } }, "Element Throughput": { "hint": { "type": "string", "value": "item_rate" }, "short_name": { "type": "string", "value": "Elem/s" }, "description": { "type": "string", "value": "Number of input elements handled per second." }, "value": { "type": "float64", "value": "32029590505.3583" } }, "Average Global Memory Throughput": { "hint": { "type": "string", "value": "byte_rate" }, "short_name": { "type": "string", "value": "GlobalMem BW" }, "description": { "type": "string", "value": "Number of bytes read/written per second to the CUDA device's global memory." }, "value": { "type": "float64", "value": "512473448085.7328" } }, "Percent Peak Global Memory Throughput": { "hint": { "type": "string", "value": "percentage" }, "short_name": { "type": "string", "value": "BWPeak" }, "description": { "type": "string", "value": "Global device memory throughput as a percentage of the device's peak bandwidth." }, "value": { "type": "float64", "value": "0.6999473449597531" } }, "Average GPU Time (Batch)": { "hint": { "type": "string", "value": "duration" }, "short_name": { "type": "string", "value": "Batch GPU" }, "description": { "type": "string", "value": "Average back-to-back kernel execution time as measured by CUDA events." }, "value": { "type": "float64", "value": "0.0002600505164606654" } }, "Number of Samples (Batch)": { "hint": { "type": "string", "value": "sample_size" }, "short_name": { "type": "string", "value": "Batch" }, "description": { "type": "string", "value": "Number of kernel executions in hot time measurements." }, "value": { "type": "int64", "value": "2010" } } }, "is_skipped": false }, "Device=1 In=F64 Out=F64": { "device": 1, "type_config_index": 35, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 0.5, "axis_values": { "In": { "type": "string", "value": "F64" }, "Out": { "type": "string", "value": "F64" } }, "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." } } } ] }