{ "meta": { "argv": [ "bin/nvbench.example.axes", "--json", "/home/av/code/src/nvbench/scripts/test_cmp.json" ], "version": { "json": { "major": 1, "minor": 0, "patch": 0, "string": "1.0.0" }, "nvbench": { "major": 0, "minor": 1, "patch": 0, "string": "0.1.0", "git_branch": "walltime_reports", "git_sha": "348acbd6eb752a87e15c28fe1ad1cb827eaaadec", "git_version": "old-cmake-63-g348acbd", "git_is_dirty": false } } }, "devices": [ { "id": 0, "name": "Quadro GV100", "sm_version": 700, "ptx_version": 700, "sm_default_clock_rate": 1627000000, "number_of_sms": 80, "max_blocks_per_sm": 32, "max_threads_per_sm": 2048, "max_threads_per_block": 1024, "registers_per_sm": 65536, "registers_per_block": 65536, "global_memory_size": 34086060032, "global_memory_bus_peak_clock_rate": 850000000, "global_memory_bus_width": 4096, "global_memory_bus_bandwidth": 870400000000, "l2_cache_size": 6291456, "shared_memory_per_sm": 98304, "shared_memory_per_block": 49152, "ecc_state": false }, { "id": 1, "name": "Quadro GP100", "sm_version": 600, "ptx_version": 600, "sm_default_clock_rate": 1442500000, "number_of_sms": 56, "max_blocks_per_sm": 32, "max_threads_per_sm": 2048, "max_threads_per_block": 1024, "registers_per_sm": 65536, "registers_per_block": 65536, "global_memory_size": 17069309952, "global_memory_bus_peak_clock_rate": 715000000, "global_memory_bus_width": 4096, "global_memory_bus_bandwidth": 732160000000, "l2_cache_size": 4194304, "shared_memory_per_sm": 65536, "shared_memory_per_block": 49152, "ecc_state": false } ], "benchmarks": [ { "name": "simple", "index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": null, "states": [ { "name": "Device=0", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": null, "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "499" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001009524801603207" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006144561739025865" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010034006580799991" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0005237510233783218" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.514396598" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "524" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001001475909284053" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524788153" } ] } ], "is_skipped": false }, { "name": "Device=1", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": null, "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "499" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010077174468937882" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00494341955894122" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010027929121602258" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00033287816568109313" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5123603010000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "524" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010014740456151597" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524795703" } ] } ], "is_skipped": false } ] }, { "name": "single_float64_axis", "index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": [ { "name": "Duration", "type": "float64", "flags": "", "values": [ { "input_string": "0", "description": "", "value": 0.0 }, { "input_string": "0.0001", "description": "", "value": 0.0001 }, { "input_string": "0.0002", "description": "", "value": 0.0002 }, { "input_string": "0.0003", "description": "", "value": 0.00030000000000000003 }, { "input_string": "0.0004", "description": "", "value": 0.0004 }, { "input_string": "0.0005", "description": "", "value": 0.0005 }, { "input_string": "0.0006", "description": "", "value": 0.0006000000000000001 }, { "input_string": "0.0007", "description": "", "value": 0.0007000000000000001 }, { "input_string": "0.0008", "description": "", "value": 0.0008000000000000001 }, { "input_string": "0.0009", "description": "", "value": 0.0009000000000000002 }, { "input_string": "0.001", "description": "", "value": 0.0010000000000000002 } ] } ], "states": [ { "name": "Device=0 Duration=0", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "127632" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "9.535606282123409e-06" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "1.4448218958078975" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "3.918024581663389e-06" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.14066541529910018" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "11.513563003000002" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "274328" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "1.8226457245237315e-06" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.500101118" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0001", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "4853" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010851134411704107" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.053377272961503276" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010302993536069301" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004807683479660842" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.6373502280000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "5088" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010137620362095862" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.51581551" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0002", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2459" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002088847271248475" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.027095357105136896" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00020339123081777852" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002426402384835198" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5670174410000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2582" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00020172880307174672" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.520878249" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0003", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.00030000000000000003" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1652" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003082859001210656" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01842186373388549" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003027270989578126" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0016270299573856555" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.544737606" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1736" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00030105657621462773" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522648918" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0004", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0004" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1241" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00040859692667203864" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.013800282471048258" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004030542842665574" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0012342926945401174" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.533285391" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1304" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00040140879812416123" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5234506480000001" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0005", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0005" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "994" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005090076327967808" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01104211789520747" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005034694101968762" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.000960945456149481" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.526845475" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1044" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005017609577982818" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5238518600000001" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0006", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0006000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "830" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006083229987951809" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009259805546541143" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006027641820620359" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0008125705181484989" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.52231507" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "872" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006010903174724053" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5241642790000001" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0007", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0007000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "712" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007086338553370777" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007928264539185437" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007030805292424196" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0007112507950799924" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.519468829" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "748" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007014426981064088" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.52469385" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0008", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0008000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "623" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008089985730337072" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006971030802740222" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008034196651957732" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0006306208005906063" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5170688250000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "654" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008017951428707951" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5243872230000001" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0009", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0009000000000000002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "554" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009083576299638984" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006199510137107782" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009027842496276245" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0005444417680564487" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.514841552" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "582" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009011235712320125" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524466611" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.001", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0010000000000000002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "499" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010087251282565122" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005573661860035435" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010031565917517711" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0004852012011897464" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5138195830000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "524" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010014756763254413" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524785882" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "153013" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "7.705666139478051e-06" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "1.5262458153177543" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "3.057407826310601e-06" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.045574170376734044" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "15.000211589000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "369906" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "1.3516989302429717e-06" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.500042922" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0001", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "4879" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010713845111703245" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.045460323768744995" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010249834163043719" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0030010311127595573" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.6230727620000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "5081" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010137619922490036" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5151083350000001" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0002", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2465" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00020751516592292123" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.023007065837400455" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00020286964052951872" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0014997658908938753" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.559679316" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2588" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00020172862033755555" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522088477" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0003", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.00030000000000000003" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1655" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003068471528700908" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.015443555151131" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00030220268294890517" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0010498159491600372" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.539562934" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1736" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003010567520071284" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.52264897" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0004", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0004" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1243" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00040717730973451277" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.011630958382375049" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00040252058700697966" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0007711533484593173" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5294895540000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1305" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004014086726981561" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.52385337" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0005", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0005" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "995" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005075514221105535" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009291726931158024" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005029017407690461" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0006019586171273846" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523705419" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1044" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005017608408726951" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5238529980000001" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0006", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0006000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "831" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006068636666666669" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00775159368655319" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000602217434115358" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0005211064062823375" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5198631410000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "873" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006010892503176905" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524771732" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0007", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0007000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "712" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007072028300561799" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006649464561878749" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007025522259848826" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0004304629385174026" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.516796464" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "748" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007014422085195939" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524693347" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0008", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0008000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "623" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008076071910112361" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005864235047342223" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008029232501600935" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.000386286201448909" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.514722272" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "655" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008017945267771947" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5251914990000001" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0009", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0009000000000000002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "555" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009069257099099103" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005199849951312571" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009022579880448067" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.000339409683584611" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.513695142" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "582" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00090112220790378" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5244711550000001" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.001", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0010000000000000002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "499" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010072258977955914" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004633193202486146" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010026042473340073" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0003115372302150914" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.511907711" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "524" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010014748609703007" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524787242" } ] } ], "is_skipped": false } ] }, { "name": "copy_sweep_grid_shape", "index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": [ { "name": "BlockSize", "type": "int64", "flags": "pow2", "values": [ { "input_string": "6", "description": "2^6 = 64", "value": 64 }, { "input_string": "8", "description": "2^8 = 256", "value": 256 }, { "input_string": "10", "description": "2^10 = 1024", "value": 1024 } ] }, { "name": "NumBlocks", "type": "int64", "flags": "pow2", "values": [ { "input_string": "6", "description": "2^6 = 64", "value": 64 }, { "input_string": "8", "description": "2^8 = 256", "value": 256 }, { "input_string": "10", "description": "2^10 = 1024", "value": 1024 } ] } ], "states": [ { "name": "Device=0 BlockSize=2^6 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "78" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00648948455128205" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0015111507522308748" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.006484057010748448" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0012531664584969381" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "10349826333.845528" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "82798610670.76422" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.09512708027431552" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5077619640000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "81" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.006481402361834491" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5250069540000001" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^8 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "672" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00217197076636905" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0074534188597851336" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002166515097376846" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00701989634431853" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "30975488738.229183" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "247803909905.83347" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.2847011832557829" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.473432187" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "673" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002163565506021122" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.4606610070000001" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^10 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "688" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010916693808139535" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01306842599006877" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001086250233269015" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012078568140597113" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "61780298815.71512" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "494242390525.72095" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.567833628820911" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.765170478" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "689" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010836307621832676" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.748836308" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^6 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "231" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002171097186147186" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00443956157556455" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0021655962921324217" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.003655102168422409" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "30988630819.05223" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "247909046552.41785" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.28482197443981827" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.506240788" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "243" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002161031840760031" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.525142297" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^8 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "736" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010725499320652177" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.011413433377036444" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001067108783223058" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010207282915832727" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "62888493708.49215" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "503107949667.9372" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5780192436442293" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.804676228" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "737" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010644761438770877" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.787106834" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^10 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1488" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009658611908602143" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007916411658808452" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009604295065966908" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005543866520742756" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "69873804937.337" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "558990439498.696" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.642222471850524" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.468436431" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1489" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009584573153443874" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.4370937890000002" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^6 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "528" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010655318598484856" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010411330423168705" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010600458776408978" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009061611791593436" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "63307509057.38994" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "506460072459.1195" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.581870487659834" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.573568937" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "529" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010571805049431119" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.559696812" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^8 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1032" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009617264147286825" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007568406287684157" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009562815504018629" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0049994946059467" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "70176888774.8577" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "561415110198.8616" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6450081688865598" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.013943156" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1033" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009539899551395297" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.9906108020000001" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^10 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "560" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010253841303571433" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.03170958999602246" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010199987426400187" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.031247624116965786" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "65793085025.09035" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "526344680200.7228" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6047158550100216" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5859060660000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "561" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010093532926046065" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5665410750000001" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^6 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2245" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0066631781487750605" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010029284827333777" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.006658390919190473" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009997863006145854" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "10078841091.558964" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "80630728732.47171" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.11012719724168449" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "15.004037418000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2246" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.006649344001406553" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "14.956094204000001" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^8 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "218" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002299290371559632" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0030536390935653273" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002294595665887955" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002251444609090054" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "29246487735.359" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "233971901882.872" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.3195638957097793" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5053657460000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "228" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002293837965580455" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523009414" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^10 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "426" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001178968861502347" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005201397517740588" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011742734310212829" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0033150798656458847" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "57149265432.69776" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "457194123461.5821" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6244456450251067" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5103190240000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "450" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011726047092013889" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.527687784" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^6 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "226" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002219887185840708" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0026061881909203283" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002215178051881032" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0015054811845863602" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "30295020277.49602" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "242360162219.96817" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.33102076352159115" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.506002865" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "237" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0022142488503757913" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524792129" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^8 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "448" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011318572321428575" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007609392746712896" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011271811462938788" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0063649890038617206" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59536893622.33475" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "476295148978.678" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6505342397545317" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5155772900000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "470" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011258403372257314" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.529160021" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^10 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "447" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011232368366890376" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004611911863103576" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001118592285736562" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0019864118812352185" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59994034337.37313" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "479952274698.98505" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.655529221343675" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.510514425" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "470" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011164527406083776" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.52474862" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^6 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "448" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011216608169642855" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005148879095566737" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011169912165829119" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002975557170232136" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "60080028386.70366" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "480640227093.6293" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6564688416379333" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.510971747" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "470" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011154764378324467" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524288288" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^8 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "447" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011246830559284123" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004922192808378086" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011200362225240248" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002646502354772987" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59916690773.418724" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "479333526187.3498" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6546841212130542" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.511144538" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "469" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011177327820995468" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5242309390000001" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^10 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "474" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010598897257383965" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004913062706223566" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001055195342387831" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0020503329663902545" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "63598521813.16255" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "508788174505.3004" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6949139184130524" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.511291385" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "498" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010538602162556477" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524838223" } ] } ], "is_skipped": false } ] }, { "name": "copy_type_sweep", "index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": [ { "name": "T", "type": "type", "flags": "", "values": [ { "input_string": "U8", "description": "uint8_t", "is_active": true }, { "input_string": "U16", "description": "uint16_t", "is_active": true }, { "input_string": "U32", "description": "uint32_t", "is_active": true }, { "input_string": "U64", "description": "uint64_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] } ], "states": [ { "name": "Device=0 T=U8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "T", "type": "string", "value": "U8" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2992" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0022994002396390365" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.024778400174351137" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002293938610882044" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.02459574709695746" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "117019459338.00893" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "234038918676.01785" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.2688866253171161" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "6.944152369" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2993" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0022801307408338873" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "6.853598372" } ] } ], "is_skipped": false }, { "name": "Device=0 T=U16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 1, "axis_values": [ { "name": "T", "type": "string", "value": "U16" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "672" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014471324925595243" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006960808950083016" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001441753045966228" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005894275617037584" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "93093424269.51526" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "372373697078.06104" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.4278190453562282" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.98617708" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "673" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001438309451800399" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.9712327470000001" } ] } ], "is_skipped": false }, { "name": "Device=0 T=U32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 2, "axis_values": [ { "name": "T", "type": "string", "value": "U32" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "848" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001072196766509434" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.011171612715506738" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010668103765204251" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009987563873112983" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "62906084789.7697" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "503248678318.1576" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5781809263765597" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.9268544190000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "849" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001064370134974818" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.9073867080000001" } ] } ], "is_skipped": false }, { "name": "Device=0 T=U64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 3, "axis_values": [ { "name": "T", "type": "string", "value": "U64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1568" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009390030325255086" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008249904375540816" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009335942644701952" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005885112868111755" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "35941129114.62859" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "575058065834.0575" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6606825204894962" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.505405182" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1569" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009319373003763345" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.4727063310000001" } ] } ], "is_skipped": false }, { "name": "Device=0 T=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 4, "axis_values": [ { "name": "T", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "752" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010726744441489362" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01103458165791857" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010672343821918702" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009783271621840188" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "62881092588.27738" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "503048740706.21906" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5779512186422553" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.822184149" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "753" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001063365562503555" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.803427655" } ] } ], "is_skipped": false }, { "name": "Device=0 T=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 5, "axis_values": [ { "name": "T", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "544" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009393335257352945" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007706533640467741" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000933937587282237" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005096920917422438" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "35927916872.52203" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "574846669960.3525" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6604396483919491" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5221495780000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "565" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009305206028761061" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.525756552" } ] } ], "is_skipped": false }, { "name": "Device=1 T=U8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "T", "type": "string", "value": "U8" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2784" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0027056495269396513" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009323042699490573" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0027009093115727078" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009134230706566165" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "99387067477.54266" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "198774134955.08533" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.2714900226112944" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "7.588833747000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2785" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002695659536947251" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "7.532905504" } ] } ], "is_skipped": false }, { "name": "Device=1 T=U16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 1, "axis_values": [ { "name": "T", "type": "string", "value": "U16" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "330" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0015226199969696965" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005622755807814305" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0015179373560529775" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004685161525974869" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "88421124537.70831" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "353684498150.83325" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.4830699548607316" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.508664443" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "349" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0015155031045733347" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528926318" } ] } ], "is_skipped": false }, { "name": "Device=1 T=U32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 2, "axis_values": [ { "name": "T", "type": "string", "value": "U32" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "528" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011321445473484848" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007683380682909642" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011274604856064824" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006460085786164455" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59522142777.2707" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "476177142218.1656" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6503730635628354" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.607756878" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "529" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011264972348745013" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5965697010000001" } ] } ], "is_skipped": false }, { "name": "Device=1 T=U64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 3, "axis_values": [ { "name": "T", "type": "string", "value": "U64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "478" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010525728723849374" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005381193387193611" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00104785640469156" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0028679450954259256" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "32021975386.863106" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "512351606189.8097" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6997809306569734" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.512086032" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "500" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010454827880859374" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5227592090000001" } ] } ], "is_skipped": false }, { "name": "Device=1 T=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 4, "axis_values": [ { "name": "T", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "528" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011322722803030294" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0076927816018557355" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011275246077866272" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006425647618712464" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59518757760.62857" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "476150062085.02856" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.650336076929945" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.608051446" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "529" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011258555051284391" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.596245555" } ] } ], "is_skipped": false }, { "name": "Device=1 T=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 5, "axis_values": [ { "name": "T", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "478" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010523593117154819" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005286902872056256" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001047618542256216" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002706934586546566" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "32029245996.099976" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "512467935937.5996" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6999398163483386" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.51215675" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "503" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010453338319691226" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.525822574" } ] } ], "is_skipped": false } ] }, { "name": "copy_type_conversion_sweep", "index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": [ { "name": "In", "type": "type", "flags": "", "values": [ { "input_string": "I8", "description": "int8_t", "is_active": true }, { "input_string": "I16", "description": "int16_t", "is_active": true }, { "input_string": "I32", "description": "int32_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "I64", "description": "int64_t", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] }, { "name": "Out", "type": "type", "flags": "", "values": [ { "input_string": "I8", "description": "int8_t", "is_active": true }, { "input_string": "I16", "description": "int16_t", "is_active": true }, { "input_string": "I32", "description": "int32_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "I64", "description": "int64_t", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] } ], "states": [ { "name": "Device=0 In=I8 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=I8 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 1, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1008" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006586167946428575" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.05914716011832632" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006528769518056576" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.057982657554439924" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "102789451847.54562" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "308368355542.63684" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.35428349671718384" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.6850768" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1009" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006166892571539062" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.623953807" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I8 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 2, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "684" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007371795058479537" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008847285785468822" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000731761917384746" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004879904384809398" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "91708604131.57506" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "458543020657.87524" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5268187277778897" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5183809100000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "719" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000729479623935153" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5245085300000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I8 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 3, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "680" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007416148632352943" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00847915084559806" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000736235386308502" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0043064135466205815" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "91151369858.06279" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "455756849290.3139" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5236177036883202" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5183531140000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "718" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007336860167946988" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.526798324" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I8 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 4, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "536870912" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "528" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0012050906723484857" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009907122479821073" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011996847262436706" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008827766250664237" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "55938750016.53507" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "503448750148.8156" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5784107883143562" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.647279072" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "529" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011969163755838723" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.634178896" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I8 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 5, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "536870912" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1040" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011804124500000013" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00957111143535521" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011749697549985022" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00836195198403357" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "57115396983.206215" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "514038572848.85596" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5905774044678952" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.249563157" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1041" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011735446663800626" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.2279065690000002" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I16 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 6, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I16 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 7, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=I16 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 8, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1632" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00043062034803921626" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01672483523731806" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00042520409690983404" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010836224516018789" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "78913708131.82764" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "473482248790.9659" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5439823630410913" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.737060063" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1633" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004232788786191731" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.695498727" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I16 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 9, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1184" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00043284459121621524" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.015105337896417907" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004274506211733894" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008262516586090977" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "78498966518.9634" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "470993799113.7804" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5411233905259426" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5372377770000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1232" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004249965618183087" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523606656" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I16 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 10, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "768" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006572663450520837" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.011014295443548292" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006518266665128367" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007223480904816997" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "51477538007.934814" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "514775380079.34814" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5914239201279275" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.520738605" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "796" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006487323243414338" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.516403752" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I16 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 11, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "880" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006565674102272736" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01097889608017816" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006511251280253577" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007150441437138621" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "51533001194.03968" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "515330011940.39685" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5920611350418162" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.596114039" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "881" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006487893111724723" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.572324006" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I32 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 12, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I32 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 13, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I32 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 14, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=I32 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 15, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1904" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026858391123949583" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.024532996731547897" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002631697807648852" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0133776106644627" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "63750541385.25386" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "510004331082.0309" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5859424759674068" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.551536212" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1969" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00025964095085147915" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5112442500000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I32 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 16, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1328" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00038312173493975965" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0162544382499927" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00037769761349422534" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00766160749599669" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44419703489.221306" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "533036441870.65564" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6124040003109554" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.536583359" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1388" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003753291852879593" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.520968135" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I32 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 17, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1328" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00038320030346385516" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.016795599989722854" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00037778021639819085" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008713885990809477" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44409990972.94271" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "532919891675.31256" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6122700961343205" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.536768873" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1377" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003755766647660222" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.517180861" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F32 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 18, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F32 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 19, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F32 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 20, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1904" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026906845745798324" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.023966161873692115" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026363512487033393" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012223432341603665" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "63638014882.31772" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "509104119058.54175" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5849082250213026" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.552411471" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1961" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002602678033419253" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5103970170000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F32 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 21, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=F32 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 22, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1328" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00038308867695783106" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.016918671591625058" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003776144570480286" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008723758091138187" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44429485383.46378" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "533153824601.56537" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6125388609852543" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5366656240000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1396" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003754830346749642" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5241867800000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F32 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 23, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1328" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00038299202560240965" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0167760658438423" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003775633729949433" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008684523141297206" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44435496660.91339" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "533225959930.9607" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6126217370530339" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5364044290000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1404" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00037541050924534816" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5270894850000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I64 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 24, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I64 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 25, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I64 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 26, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I64 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 27, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I64 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 28, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=I64 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 29, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "8388608" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2112" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002423827249053035" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.02416424179820878" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00023696183533210337" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007872204592971034" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "35400671117.538055" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "566410737880.6089" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6507476308370966" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5565369530000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2205" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00023414492098922904" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5163003700000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F64 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 30, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F64 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 31, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F64 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 32, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F64 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 33, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F64 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 34, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "8388608" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2112" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00024286170075757575" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.02414779678250403" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00023745110798909405" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008091753071026355" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "35327727341.60197" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "565243637465.6315" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6494067526029773" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5576062110000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2233" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00023462851593113247" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523938348" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F64 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 35, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I8 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I8 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 1, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1024" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000683441244140624" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.03316062878230732" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006786162495845936" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.03212477441508221" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "98890741329.99298" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "296672223989.97894" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.4052013548814179" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.719493135" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1025" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000659710607877592" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.678423381" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I8 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 2, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "592" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008640211064189187" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00944028164858259" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008593624308705327" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007736902908752538" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "78091456630.25882" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "390457283151.29407" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5332950217866232" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5227107280000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "614" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008578363971523819" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5267261910000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I8 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 3, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "592" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008612816165540544" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00975919715067052" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000856635513035832" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008096027798987914" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "78340044253.09521" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "391700221265.4761" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5349926536077853" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.521050054" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "599" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000855541095510747" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5124821390000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I8 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 4, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "536870912" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "672" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014581301889880955" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006071682334960142" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014534626205762236" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005154971568436681" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "46171716458.311646" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "415545448124.8048" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5675609813767548" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.9926510430000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "673" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001450536531289656" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.9791953990000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I8 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 5, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "536870912" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "352" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014604223210227273" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006352174295896549" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014556942754848428" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005460161744719934" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "46100932819.597916" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "414908395376.3812" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5666908809227235" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.520818273" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "361" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014522860624783588" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524288878" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I16 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 6, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I16 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 7, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I16 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 8, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1104" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00046094446557971044" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012758359369013577" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004563006377252548" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007641740734756292" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "73535799045.2856" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "441214794271.71356" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6026207308125459" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.529887408" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1140" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000454689802203262" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.518361091" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I16 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 9, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1104" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004598212318840582" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012784453789403875" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004551693620025247" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007660841909416756" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "73718564563.25784" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "442311387379.54706" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6041184814515229" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5287007920000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1154" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004530425443599707" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5228252680000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I16 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 10, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "672" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007534447321428569" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008541470607558692" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007488120960160388" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005872606532245015" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44810216312.63993" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "448102163126.3993" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6120276485008732" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5189650410000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "701" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007457322407721113" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522771435" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I16 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 11, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "672" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007513076056547618" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008183588591017211" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007466521440517336" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005268370256387482" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44939845505.452805" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "449398455054.528" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6137981521177448" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.517594289" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "705" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007440871218417554" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524597968" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I32 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 12, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I32 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 13, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I32 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 14, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I32 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 15, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1840" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002776829885869563" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.017861418449176162" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002730688870924969" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005670388220307151" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "61439500408.250595" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "491516003266.00476" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6713232125027382" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.546509219" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1927" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002714873839983621" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523169341" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I32 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 16, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1196" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004229804180602015" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012054556683918517" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004183483349290177" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004710258623264892" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "40103460679.117165" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "481241528149.406" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6572901116551109" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528636114" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1252" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004159514698357628" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.520784637" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I32 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 17, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1195" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004231393305439326" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012074190328924192" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004185113171653266" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004772051537598408" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "40087843056.75637" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "481054116681.0764" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6570341410089002" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528483731" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1258" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00041619300539050074" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523585816" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F32 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 18, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F32 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 19, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F32 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 20, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1808" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00028167357632743345" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.021202385815789038" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00027701768152151984" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012884867104306086" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "60563700872.27331" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "484509606978.18646" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6617537245659234" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5443872160000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1858" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002751795970970129" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5112970170000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F32 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 21, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=F32 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 22, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1195" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00042308463263598364" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012089671437059933" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004184455500237615" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004773399327485907" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "40094143668.26772" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "481129724019.21265" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.657137407150367" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528290021" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1264" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004161183321023289" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.525988793" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F32 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 23, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1195" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004230846794979085" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012109978274913669" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004184373557567601" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004746400070144573" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "40094928832.67498" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "481139145992.0998" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6571502758851887" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528360885" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1255" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00041616955031436757" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522306598" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I64 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 24, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I64 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 25, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I64 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 26, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I64 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 27, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I64 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 28, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I64 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 29, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "8388608" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1909" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002665689759036145" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0182363750388233" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026191822334444936" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.003986137271503454" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "32027584384.489807" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "512441350151.8369" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6999035049058088" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.545965233" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2012" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002600545409185512" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.52324384" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F64 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 30, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F64 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 31, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F64 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 32, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F64 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 33, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F64 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 34, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "8388608" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1909" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026655877475117843" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01817264133840171" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002619331087848795" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004144721519339008" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "32025764283.542324" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "512412228536.6772" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6998637299725158" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.546077204" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2003" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002600450215789597" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.520883479" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F64 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 35, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." } ] } ] }