{ "meta": { "argv": [ "bin/nvbench.example.axes", "--json", "/home/av/code/src/nvbench/scripts/test_ref.json" ], "version": { "json": { "major": 1, "minor": 0, "patch": 0, "string": "1.0.0" }, "nvbench": { "major": 0, "minor": 1, "patch": 0, "string": "0.1.0", "git_branch": "walltime_reports", "git_sha": "348acbd6eb752a87e15c28fe1ad1cb827eaaadec", "git_version": "old-cmake-63-g348acbd", "git_is_dirty": false } } }, "devices": [ { "id": 0, "name": "Quadro GV100", "sm_version": 700, "ptx_version": 700, "sm_default_clock_rate": 1627000000, "number_of_sms": 80, "max_blocks_per_sm": 32, "max_threads_per_sm": 2048, "max_threads_per_block": 1024, "registers_per_sm": 65536, "registers_per_block": 65536, "global_memory_size": 34086060032, "global_memory_bus_peak_clock_rate": 850000000, "global_memory_bus_width": 4096, "global_memory_bus_bandwidth": 870400000000, "l2_cache_size": 6291456, "shared_memory_per_sm": 98304, "shared_memory_per_block": 49152, "ecc_state": false }, { "id": 1, "name": "Quadro GP100", "sm_version": 600, "ptx_version": 600, "sm_default_clock_rate": 1442500000, "number_of_sms": 56, "max_blocks_per_sm": 32, "max_threads_per_sm": 2048, "max_threads_per_block": 1024, "registers_per_sm": 65536, "registers_per_block": 65536, "global_memory_size": 17069309952, "global_memory_bus_peak_clock_rate": 715000000, "global_memory_bus_width": 4096, "global_memory_bus_bandwidth": 732160000000, "l2_cache_size": 4194304, "shared_memory_per_sm": 65536, "shared_memory_per_block": 49152, "ecc_state": false } ], "benchmarks": [ { "name": "simple", "index": 0, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": null, "states": [ { "name": "Device=0", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": null, "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "499" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010094458717434867" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005997663682735138" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010034715849794225" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0005782350585973689" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.51435071" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "524" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001001475909284053" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524782268" } ] } ], "is_skipped": false }, { "name": "Device=1", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": null, "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "499" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010075622164328662" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004836642334083953" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010027443022431728" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00034308545348455907" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.512193993" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "524" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010014738126565483" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5247834060000001" } ] } ], "is_skipped": false } ] }, { "name": "single_float64_axis", "index": 1, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": [ { "name": "Duration", "type": "float64", "flags": "", "values": [ { "input_string": "0", "description": "", "value": 0.0 }, { "input_string": "0.0001", "description": "", "value": 0.0001 }, { "input_string": "0.0002", "description": "", "value": 0.0002 }, { "input_string": "0.0003", "description": "", "value": 0.00030000000000000003 }, { "input_string": "0.0004", "description": "", "value": 0.0004 }, { "input_string": "0.0005", "description": "", "value": 0.0005 }, { "input_string": "0.0006", "description": "", "value": 0.0006000000000000001 }, { "input_string": "0.0007", "description": "", "value": 0.0007000000000000001 }, { "input_string": "0.0008", "description": "", "value": 0.0008000000000000001 }, { "input_string": "0.0009", "description": "", "value": 0.0009000000000000002 }, { "input_string": "0.001", "description": "", "value": 0.0010000000000000002 } ] } ], "states": [ { "name": "Device=0 Duration=0", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "127488" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "9.540251349146535e-06" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "1.4435508787705211" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "3.9224058844425625e-06" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.14064817853323436" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "11.490547931" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "274905" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "1.8188127571551e-06" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.500083096" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0001", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "4853" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010853461796826674" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.05359830602702947" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010302987600936478" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00484111901842999" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.637141422" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "5092" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0001013762061275653" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.51621627" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0002", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2459" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00020891169174461132" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.02717422799526722" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00020340182381027155" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002406936807045068" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5674029660000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2582" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00020172880307174672" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.520873229" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0003", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.00030000000000000003" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1652" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003081868111380144" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.018240770684480382" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00030268341853095175" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0016523707958282026" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5447823540000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1736" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00030105657621462773" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5226434630000001" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0004", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0004" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1241" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00040852977276389983" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.013603343023457075" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00040306361880540617" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0012210042127847492" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.53335829" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1304" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00040140879812416123" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5234471270000001" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0005", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0005" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "994" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005089016619718308" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010853962612041912" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000503456178265558" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0009750606561696034" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5268077360000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1044" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005017619516657686" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523849472" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0006", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0006000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "830" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006082555698795184" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009191209785025295" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000602735921345563" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0008234812151490051" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522369137" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "872" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006010903174724053" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524159087" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0007", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0007000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "712" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000708571620786517" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007823433090894212" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007030903266721907" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0007055254847806133" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5193877680000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "748" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007014426981064088" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524686893" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0008", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0008000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "623" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008089194157303374" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006828496858360085" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008034522826177895" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.000611164680542835" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.516959448" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "654" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008017951428707951" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524383518" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.0009", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0009000000000000002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "554" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009082872328519855" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006126265423787953" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009027800905360124" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00054941989913754" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.514815663" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "582" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009011235712320125" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524463788" } ] } ], "is_skipped": false }, { "name": "Device=0 Duration=0.001", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0010000000000000002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "499" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010086229759519048" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005485055388542774" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010031437666000492" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0004923631784045008" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.513707802" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "524" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001001475909284053" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5247822560000001" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "153037" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "7.764162771094724e-06" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "1.5441551718680286" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "3.05725036652246e-06" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0422080578285922" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "15.000158798000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "369923" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "1.3516386844065532e-06" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5000379110000001" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0001", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "4880" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010714987602459019" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.04579843914769717" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010247656405124585" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.003070733813086406" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.623587079" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "5111" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00010137617021268466" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5181448860000001" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0002", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2466" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002074915798864561" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.023038028738255983" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00020283785226716245" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0015459112691612667" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5598459640000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2588" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002017284788341021" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522084334" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0003", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.00030000000000000003" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1655" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00030685509425981873" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.015537955668238047" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00030217996281079384" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0010230869145796749" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.539645641" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1737" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003010563825696243" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522946315" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0004", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0004" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1243" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004072019324215605" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.011695107558689763" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004025163378863194" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0007675334678184685" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.529549118" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1304" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004014085172875527" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523448241" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0005", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0005" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "995" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005075862180904529" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009382010906172408" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005028912236343076" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.000614146973517185" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523755894" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1044" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0005017611916494552" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523849106" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0006", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0006000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "831" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000606902394705175" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007845243707907233" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006022033425301283" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0005063896891280906" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.519957368" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "872" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006010888475890554" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524160638" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0007", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0007000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "712" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007072048665730335" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0066432216884127464" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000702561125326692" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.000444186835693086" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.516785433" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "747" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007014415899274179" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523988754" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0008", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0008000000000000001" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "623" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008075409711075438" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005835313968640737" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008028804110677048" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00038458153244696385" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.514684533" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "654" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008017937429818903" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524384532" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.0009", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0009000000000000002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "555" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009069636108108111" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00524675883682327" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009022562016237966" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00035453453918318805" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.513753633" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "583" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009011228374919596" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.525365383" } ] } ], "is_skipped": false }, { "name": "Device=1 Duration=0.001", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "Duration", "type": "float64", "value": "0.0010000000000000002" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "499" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010072655711422854" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004687180507983469" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010025901990328623" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0003187607548154873" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.511897986" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "524" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010014750939289121" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524783733" } ] } ], "is_skipped": false } ] }, { "name": "copy_sweep_grid_shape", "index": 2, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": [ { "name": "BlockSize", "type": "int64", "flags": "pow2", "values": [ { "input_string": "6", "description": "2^6 = 64", "value": 64 }, { "input_string": "8", "description": "2^8 = 256", "value": 256 }, { "input_string": "10", "description": "2^10 = 1024", "value": 1024 } ] }, { "name": "NumBlocks", "type": "int64", "flags": "pow2", "values": [ { "input_string": "6", "description": "2^6 = 64", "value": 64 }, { "input_string": "8", "description": "2^8 = 256", "value": 256 }, { "input_string": "10", "description": "2^10 = 1024", "value": 1024 } ] } ], "states": [ { "name": "Device=0 BlockSize=2^6 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "78" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.006491010679487182" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0014400823428293225" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.006485689823444072" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.001177496193520018" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "10347220700.783287" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "82777765606.2663" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.09510313144102286" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.507885141" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "81" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.006479606722608024" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524857216" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^8 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "656" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002171159740853659" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008002387413205372" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0021657661977337647" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007621859662189712" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "30986199743.177273" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "247889597945.41818" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.2847996299924382" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.437881264" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "657" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0021637841704410677" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.425925528" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^10 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "752" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010918482712765959" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012885667055169438" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010864888095158216" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.011885928967750255" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "61766732811.45538" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "494133862491.64307" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5677089412817591" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.8365534670000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "753" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010831134183156693" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.8183730920000001" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^6 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "231" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002170435731601731" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004358290289328953" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0021650726464919703" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.003568770929248165" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "30996125746.05075" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "247969005968.406" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.28489086163649585" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.50610783" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "243" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0021624730742026746" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5254883690000001" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^8 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "848" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001072975840801887" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010997227168594192" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001067600981103923" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009773228014049224" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "62859500120.174065" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "502876000961.3925" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5777527584574822" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.9275834220000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "849" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010646783151390692" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.907651745" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^10 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1456" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009655372026098907" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007865605432218092" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009601815831693899" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005554818166025829" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "69891846684.33807" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "559134773474.7046" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6423882967310485" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.4363716210000002" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1457" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009584971580017669" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.4059789770000002" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^6 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "976" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010651546700819676" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010462843606792348" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010597991125016906" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009173166661501059" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "63322249668.22941" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "506577997345.83527" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5820059712153438" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.059860389" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "977" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010574653456130558" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.038207793" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^8 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1231" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009616476466287569" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007525336475207418" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009562607302014941" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004999666932127862" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "70178416702.1681" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "561427333617.3448" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6450222123361039" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.209430215" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1232" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009541545041486059" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.182675849" } ] } ], "is_skipped": false }, { "name": "Device=0 BlockSize=2^10 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "496" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001023795669354839" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.03114530461728092" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010184043220454653" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0306728390106973" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "65896091117.53555" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "527168728940.2844" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6056626021832312" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.518169856" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "542" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010072569899893336" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.54594076" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^6 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2244" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0066659496501782385" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012044246591117944" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0066612275798478" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012013908599240357" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "10074549052.04325" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "80596392416.346" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.11008029995676627" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "15.003487063000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2245" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.006649818384514629" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "14.950437548000002" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^8 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "218" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0022997498486238524" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0031075885812940247" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0022950336933135985" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002319295976145478" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "29240905785.18147" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "233927246281.45175" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.319502904121301" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.505458689" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "228" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0022943040278919956" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5231123440000001" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^10 NumBlocks=2^6", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "426" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011787892863849767" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0053738632436882575" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011741032116289985" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.003574335492607712" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "57157550831.40471" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "457260406651.2377" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6245361760424466" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.510194696" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "448" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011726912089756558" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.525375562" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^6 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "226" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002220062486725664" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0026283185437807914" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002215349671060005" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0015099121235202378" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "30292673376.42893" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "242341387011.43143" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.33099511993475667" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5059807590000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "237" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002214404399887922" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524823987" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^8 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "544" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001132157450367647" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007421825838534079" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011274838826673863" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006150104794311432" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59520907599.348335" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "476167260794.7867" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6503595673005719" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.626143416" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "545" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011260322986392797" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.6144447430000001" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^10 NumBlocks=2^8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "256" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "447" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011234373914988803" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0046157037787769705" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011187847153985792" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0019678845031191957" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59983715433.66298" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "479869723469.30383" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6554164710846042" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5105567950000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "471" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001116505875962049" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5258857730000001" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^6 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "64" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "448" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011217261607142856" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0051297852216839" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011170590700847755" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002961082814438008" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "60076378946.466095" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "480611031571.72876" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6564289657612117" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.510912252" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "471" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011151960174495754" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.525268305" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^8 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "256" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "447" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011251578970917226" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005034162791339124" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011205025481964396" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0028024348992886213" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59891754916.5938" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "479134039332.7504" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6544116577425022" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.511320454" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "469" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011173903865854878" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524067611" } ] } ], "is_skipped": false }, { "name": "Device=1 BlockSize=2^10 NumBlocks=2^10", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "BlockSize", "type": "int64", "value": "1024" }, { "name": "NumBlocks", "type": "int64", "value": "1024" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "474" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010600141455696206" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004975251378988354" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001055306057638257" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002145050602183929" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "63591849505.90315" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "508734796047.2252" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6948410129578578" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.51139696" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "499" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001053948106173284" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5259308100000001" } ] } ], "is_skipped": false } ] }, { "name": "copy_type_sweep", "index": 3, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": [ { "name": "T", "type": "type", "flags": "", "values": [ { "input_string": "U8", "description": "uint8_t", "is_active": true }, { "input_string": "U16", "description": "uint16_t", "is_active": true }, { "input_string": "U32", "description": "uint32_t", "is_active": true }, { "input_string": "U64", "description": "uint64_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] } ], "states": [ { "name": "Device=0 T=U8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "T", "type": "string", "value": "U8" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "3008" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0022984299517952063" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.024386082027668385" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002292998504448446" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.02420260340376021" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "117067436144.95683" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "234134872289.91367" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.2689968661419045" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "6.978260496000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "3009" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0022799289537926114" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "6.889595338" } ] } ], "is_skipped": false }, { "name": "Device=0 T=U16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 1, "axis_values": [ { "name": "T", "type": "string", "value": "U16" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "352" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001447471568181817" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006559305677944603" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014420469982380224" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0053741666978658" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "93074447756.55377" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "372297791026.2151" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.4277318371165155" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.51661557" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "364" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014379604465358862" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5234251630000001" } ] } ], "is_skipped": false }, { "name": "Device=0 T=U32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 2, "axis_values": [ { "name": "T", "type": "string", "value": "U32" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "960" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010732170854166677" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010747135588535793" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010678389670948187" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009483040603054523" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "62845490816.44543" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "502763926531.5634" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5776239964746822" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.050184244" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "961" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010641954020828663" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.027618288" } ] } ], "is_skipped": false }, { "name": "Device=0 T=U64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 3, "axis_values": [ { "name": "T", "type": "string", "value": "U64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1232" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009393459350649342" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008173389439931525" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009339579984352195" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005779340051383488" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "35927131687.09736" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "574834106993.5577" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6604252148363485" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.1830953560000002" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1233" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009315095380951709" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.155529067" } ] } ], "is_skipped": false }, { "name": "Device=0 T=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 4, "axis_values": [ { "name": "T", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "496" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001073113616935484" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01045196550212673" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010676812894882687" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009124433511281603" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "62854771981.7819" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "502838175854.2552" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5777093013031425" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.542738971" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "497" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010637711835818988" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528728784" } ] } ], "is_skipped": false }, { "name": "Device=0 T=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 5, "axis_values": [ { "name": "T", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1232" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009395226306818184" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008519805595594534" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009341417393804389" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006275440558350422" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "35920065002.399605" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "574721040038.3937" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6602953125441103" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.183265443" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1233" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0009317267593676144" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.155949535" } ] } ], "is_skipped": false }, { "name": "Device=1 T=U8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "T", "type": "string", "value": "U8" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2640" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002704848576515149" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008230990844947604" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002700116645206098" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008022518759096101" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "99416244285.81326" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "198832488571.62653" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.2715697232457749" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "7.193921508000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2641" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.002695621145895508" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "7.143042223" } ] } ], "is_skipped": false }, { "name": "Device=1 T=U16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 1, "axis_values": [ { "name": "T", "type": "string", "value": "U16" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "330" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0015221530787878797" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0057375488336673195" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0015174821813901261" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004846658675573777" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "88447646796.77927" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "353790587187.11707" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.48321485356632027" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.508487203" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "347" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001516773410764139" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5263325360000001" } ] } ], "is_skipped": false }, { "name": "Device=1 T=U32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 2, "axis_values": [ { "name": "T", "type": "string", "value": "U32" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "704" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011323334801136371" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007608036417757464" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001127679999409751" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0063837058695073836" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59510556217.30105" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "476084449738.4084" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6502464621645656" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.810527479" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "705" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011263038268326028" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.796312915" } ] } ], "is_skipped": false }, { "name": "Device=1 T=U64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 3, "axis_values": [ { "name": "T", "type": "string", "value": "U64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "478" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010527462217573217" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005281184054309557" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010480330030289645" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.002728222354093458" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "32016579538.070763" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "512265272609.1322" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.699663014380917" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.51213991" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "500" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00104523095703125" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522627688" } ] } ], "is_skipped": false }, { "name": "Device=1 T=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 4, "axis_values": [ { "name": "T", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "464" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011329571594827575" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00779514172858376" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011281946900075874" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006520518090102098" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "59483407070.05869" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "475867256560.46954" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6499498150137532" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.534519304" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "467" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011258338421774624" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5257752330000001" } ] } ], "is_skipped": false }, { "name": "Device=1 T=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 5, "axis_values": [ { "name": "T", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "478" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010524297447698746" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0053011542236172425" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010477156826142983" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0027421021970940066" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "32026276361.802433" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "512420421788.8389" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6998749204939343" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.511908565" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "501" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0010452877204575224" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5236998340000001" } ] } ], "is_skipped": false } ] }, { "name": "copy_type_conversion_sweep", "index": 4, "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "devices": [ 0, 1 ], "axes": [ { "name": "In", "type": "type", "flags": "", "values": [ { "input_string": "I8", "description": "int8_t", "is_active": true }, { "input_string": "I16", "description": "int16_t", "is_active": true }, { "input_string": "I32", "description": "int32_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "I64", "description": "int64_t", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] }, { "name": "Out", "type": "type", "flags": "", "values": [ { "input_string": "I8", "description": "int8_t", "is_active": true }, { "input_string": "I16", "description": "int16_t", "is_active": true }, { "input_string": "I32", "description": "int32_t", "is_active": true }, { "input_string": "F32", "description": "float", "is_active": true }, { "input_string": "I64", "description": "int64_t", "is_active": true }, { "input_string": "F64", "description": "double", "is_active": true } ] } ], "states": [ { "name": "Device=0 In=I8 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 0, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=I8 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 1, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "992" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006600980292338716" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.06265755233269708" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006543757735841723" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.061480066936899634" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "102554016681.31558" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "307662050043.9468" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.35347202440710795" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.6754950980000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "993" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006166902596256644" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.613870906" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I8 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 2, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "684" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007370927309941522" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008729576791697422" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007317126547558279" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004720480993831976" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "91714778422.67767" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "458573892113.3883" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5268541959023303" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5183597600000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "723" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007290440898383471" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5271070320000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I8 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 3, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "680" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007416632955882347" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00842788883853582" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007362919512917023" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004273389706237406" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "91144367234.04161" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "455721836170.20807" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5235774772176104" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.518484646" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "712" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007338700883843925" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522523862" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I8 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 4, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "536870912" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "528" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0012047073446969693" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.009645329133519535" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011993323018153505" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0085331592060071" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "55955187647.6784" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "503596688829.1056" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5785807546290276" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.647215045" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "529" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001196625677083526" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.6339911500000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I8 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 5, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "536870912" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1200" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001180585506666666" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010080936476778664" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.001175182694693406" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008979393908657816" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "57105047838.97287" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "513945430550.75586" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5904703935555559" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.4422983710000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1201" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0011731425545594744" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "1.4168893610000002" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I16 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 6, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I16 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 7, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=I16 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 8, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1696" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00043055614622641435" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01653471806668262" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00042517247028157185" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010615851149343741" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "78919578160.31331" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "473517468961.8799" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5440228273918657" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.766234603" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1697" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004232069438320117" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.7227836270000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I16 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 9, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1184" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00043288785641891876" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01493008601829662" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004275269453740999" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008146558252326382" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "78484952499.63434" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "470909714997.8061" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5410267865324059" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.537378945" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1238" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000424921975574894" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.526063107" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I16 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 10, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "768" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006571356510416664" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010841823646108464" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006517510409466911" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007001797627972981" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "51483511175.15826" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "514835111751.5826" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5914925456704763" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5206853530000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "811" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006489950196516646" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.526342823" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I16 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 11, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "768" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006567598033854167" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.010681410148370487" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006514065422428152" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006845472386064585" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "51510738416.09102" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "515107384160.9102" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5918053586407517" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5204057160000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "805" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006483509893002717" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.521930972" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I32 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 12, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I32 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 13, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I32 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 14, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=I32 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 15, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1904" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002683022746848742" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0242735351289231" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026296673859117433" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.013347568443463995" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "63799764524.90815" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "510398116199.2652" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5863948945304058" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.550977791" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2015" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002597611117303815" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523427203" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I32 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 16, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1328" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000382968452560241" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.016207309597693925" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00037759019212281323" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0077904180984165565" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44432340537.44468" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "533188086449.3362" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6125782243213881" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5365996590000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1396" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003753899079680784" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.524052128" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I32 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 17, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1328" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00038311062575301184" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.016679525157534167" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00037773971044155724" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00868318074979036" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44414753165.31679" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "532977037983.8014" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6123357513600659" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.53682523" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1367" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003756192367096059" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.513480453" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F32 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 18, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F32 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 19, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F32 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 20, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1904" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026897186554621826" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.02368215369603371" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026361388154327934" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012168663554835448" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "63643143152.328896" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "509145145218.63116" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5849553598559641" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5524873680000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1991" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026033155670242655" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5183284650000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F32 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 21, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=F32 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 22, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1328" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003830804947289157" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01669586098492127" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003777089391846278" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008791389028513721" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44418371554.07946" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "533020458648.95355" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6123856372345514" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.536636844" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1404" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003754535468555244" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.527143864" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F32 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 23, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1328" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0003830802665662652" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01641959194869226" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00037772002358393717" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008238450770780798" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44417068072.833466" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "533004816874.0016" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6123676664453144" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.536685893" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1389" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000375325125357159" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.521334187" } ] } ], "is_skipped": false }, { "name": "Device=0 In=I64 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 24, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I64 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 25, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I64 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 26, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I64 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 27, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=I64 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 28, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=0 In=I64 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 29, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "8388608" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2112" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002422139554924242" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.02400462331990132" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00023683451699572973" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007866889921134117" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "35419701935.386604" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "566715230966.1857" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6510974620475479" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.556787641" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2225" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00023412507132198032" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5209357530000001" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F64 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 30, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F64 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 31, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F64 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 32, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F64 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 33, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=0 In=F64 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 34, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "8388608" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2112" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00024282649337121185" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.024008982414037136" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002374703656091845" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008267454870626736" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "35324862445.386154" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "565197799126.1785" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6493540890695985" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5575585390000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2214" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00023455057945354847" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.519302479" } ] } ], "is_skipped": false }, { "name": "Device=0 In=F64 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 0, "type_config_index": 35, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I8 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 0, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I8 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 1, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "992" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006824859284274195" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.032440596768964644" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006776485806030618" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.03134186379501587" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "99031955383.5376" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "297095866150.6128" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.40577997452826264" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.696101135" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "993" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0006594375443362513" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.6568292870000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I8 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 2, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "592" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008640237381756752" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01007729538332117" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008593635128156565" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008503073738172521" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "78091358312.52779" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "390456791562.639" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5332943503641813" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522774643" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "615" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008578066445947664" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.527563349" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I8 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 3, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "656" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008614595929878056" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0098819558217369" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008567909279429336" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008239605399217536" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "78325834006.11096" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "391629170030.5548" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5348956102908583" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.577643271" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "657" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0008551398322462489" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.562343075" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I8 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 4, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "536870912" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "528" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014573860359848496" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006512604739977204" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014527478784774298" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00567750642698368" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "46194432629.517426" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "415749893665.65686" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5678402175284868" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.779591082" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "529" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014509199143357438" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.7691349590000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I8 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 5, "axis_values": [ { "name": "In", "type": "string", "value": "I8" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "536870912" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "352" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014603711335227268" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.006314392964515164" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014557048187337147" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005429185609780399" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "46100598923.87834" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "414905390314.905" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.5666867765446146" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.520741476" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "363" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0014527880455836777" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.527373104" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I16 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 6, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I16 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 7, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I16 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 8, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1104" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004611589565217395" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012940549047826096" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004564886665700571" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007894471093785426" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "73505509462.30429" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "441033056773.82574" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6023725097981667" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.530236417" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1149" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00045453528926308207" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5222721800000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I16 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 9, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1104" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004598074438405802" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012817089346835498" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004551599129116618" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.007723313560215716" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "73720094955.97276" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "442320569735.83655" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6041310229128012" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528748192" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1166" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00045311976542399224" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5283480380000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I16 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 10, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "672" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007533759211309518" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00846442255799797" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007486925714959693" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005711239714249503" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44817370009.36792" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "448173700093.6792" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6121253552415854" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.518973692" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "701" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007453038627853066" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5224711350000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I16 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 11, "axis_values": [ { "name": "In", "type": "string", "value": "I16" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "33554432" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "268435456" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "672" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007515454092261914" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.008392859436579346" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000746849381497928" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.005530757922319482" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "44927977221.72726" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "449279772217.2726" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6136360525257766" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.517848211" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "704" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0007438007701526989" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523646363" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I32 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 12, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I32 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 13, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I32 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 14, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I32 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 15, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1840" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00027774970760869537" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.01802281728547624" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00027309293929973365" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0057206437926147526" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "61434089226.254715" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "491472713810.0377" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6712640868253356" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.547010069" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1923" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002714794236300702" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.522066944" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I32 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 16, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1195" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00042314036485355624" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012144571947569476" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00041846681174872806" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.00470128993484091" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "40092106539.79896" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "481105278477.5875" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6571040188996771" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5286744840000001" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1263" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00041609304251410327" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.525537319" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I32 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 17, "axis_values": [ { "name": "In", "type": "string", "value": "I32" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1195" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004230686694560669" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012123446476677473" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.000418410443611225" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004701276954550611" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "40097507737.136955" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "481170092845.6435" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6571925437686346" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528583919" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1258" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004162648114566773" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523674003" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F32 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 18, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F32 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 19, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F32 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 20, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1808" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00028176399834070837" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.021188068496395096" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002770714691914288" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012675642708623109" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "60551943687.88875" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "484415549503.11" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6616252588274557" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.544610159" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1911" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00027552812178056315" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.526544561" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F32 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 21, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=F32 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 22, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1195" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00042314520585774067" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.0121383962955979" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004184789956862957" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004742822949323976" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "40090939265.62733" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "481091271187.5279" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6570848874392591" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528606066" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1257" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00041616969385503683" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.523135939" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F32 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 23, "axis_values": [ { "name": "In", "type": "string", "value": "F32" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "16777216" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "134217728" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1195" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004232182125523013" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.012193893187009835" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00041854015763334633" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004766137974469347" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "40085080712.12928" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "481020968545.5514" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6569888665668042" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.528650917" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1253" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0004162007763399092" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5215109210000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=I64 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 24, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I64 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 25, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I64 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 26, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I64 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 27, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=I64 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 28, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." }, { "name": "Device=1 In=I64 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 29, "axis_values": [ { "name": "In", "type": "string", "value": "I64" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "8388608" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1909" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002666127674174964" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.018360326425103806" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026193604216705505" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004085453563964175" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "32025405631.84502" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "512406490109.5203" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6998558923043056" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.546196184" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1976" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002601132721070819" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.5139936970000001" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F64 Out=I8", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 30, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I8" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F64 Out=I16", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 31, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I16" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F64 Out=I32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 32, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F64 Out=F32", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 33, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "F32" } ], "summaries": null, "is_skipped": true, "skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)." }, { "name": "Device=1 In=F64 Out=I64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 34, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "I64" } ], "summaries": [ { "tag": "nv/element_count/Items", "name": "Items", "description": "Number of elements: Items", "data": [ { "name": "value", "type": "int64", "value": "8388608" } ] }, { "tag": "nv/gmem/reads/InSize", "name": "InSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/gmem/writes/OutSize", "name": "OutSize", "hint": "bytes", "data": [ { "name": "value", "type": "int64", "value": "67108864" } ] }, { "tag": "nv/cold/sample_size", "name": "Samples", "description": "Number of isolated kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "1910" } ] }, { "tag": "nv/cold/time/cpu/mean", "name": "CPU Time", "description": "Mean isolated kernel execution time (measured on host CPU)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.0002665624010471204" } ] }, { "tag": "nv/cold/time/cpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated CPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.018356743233435932" } ] }, { "tag": "nv/cold/time/gpu/mean", "name": "GPU Time", "description": "Mean isolated kernel execution time (measured with CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026189029580323475" } ] }, { "tag": "nv/cold/time/gpu/stdev/relative", "name": "Noise", "description": "Relative standard deviation of isolated GPU times", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.004090112866661142" } ] }, { "tag": "nv/cold/bw/item_rate", "name": "Elem/s", "description": "Number of input elements processed per second", "hint": "item_rate", "data": [ { "name": "value", "type": "float64", "value": "32030999752.287834" } ] }, { "tag": "nv/cold/bw/global/bytes_per_second", "name": "GlobalMem BW", "description": "Number of bytes read/written per second to the CUDA device's global memory", "hint": "byte_rate", "data": [ { "name": "value", "type": "float64", "value": "512495996036.60535" } ] }, { "tag": "nv/cold/bw/global/utilization", "name": "BWUtil", "description": "Global device memory utilization as a percentage of the device's peak bandwidth", "hint": "percentage", "data": [ { "name": "value", "type": "float64", "value": "0.6999781414398565" } ] }, { "tag": "nv/cold/walltime", "name": "Walltime", "description": "Walltime used for isolated measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.546475312" } ] }, { "tag": "nv/batch/sample_size", "name": "Samples", "description": "Number of batch kernel executions", "hint": "sample_size", "data": [ { "name": "value", "type": "int64", "value": "2007" } ] }, { "tag": "nv/batch/time/gpu/mean", "name": "Batch GPU", "description": "Mean batch kernel execution time (measured by CUDA events)", "hint": "duration", "data": [ { "name": "value", "type": "float64", "value": "0.00026003379042491905" } ] }, { "tag": "nv/batch/walltime", "name": "Walltime", "description": "Walltime used for batch measurements", "hint": "duration", "hide": "Hidden by default.", "data": [ { "name": "value", "type": "float64", "value": "0.521898494" } ] } ], "is_skipped": false }, { "name": "Device=1 In=F64 Out=F64", "min_samples": 10, "min_time": 0.5, "max_noise": 0.005, "skip_time": -1.0, "timeout": 15.0, "device": 1, "type_config_index": 35, "axis_values": [ { "name": "In", "type": "string", "value": "F64" }, { "name": "Out", "type": "string", "value": "F64" } ], "summaries": null, "is_skipped": true, "skip_reason": "Not a conversion: InputType == OutputType." } ] } ] }