Files
nvbench/scripts/test_ref.json
2022-01-11 17:55:36 -05:00

17525 lines
529 KiB
JSON

{
"meta": {
"argv": [
"bin/nvbench.example.axes",
"--json",
"/home/av/code/src/nvbench/scripts/test_ref.json"
],
"version": {
"json": {
"major": 1,
"minor": 0,
"patch": 0,
"string": "1.0.0"
},
"nvbench": {
"major": 0,
"minor": 1,
"patch": 0,
"string": "0.1.0",
"git_branch": "walltime_reports",
"git_sha": "348acbd6eb752a87e15c28fe1ad1cb827eaaadec",
"git_version": "old-cmake-63-g348acbd",
"git_is_dirty": false
}
}
},
"devices": [
{
"id": 0,
"name": "Quadro GV100",
"sm_version": 700,
"ptx_version": 700,
"sm_default_clock_rate": 1627000000,
"number_of_sms": 80,
"max_blocks_per_sm": 32,
"max_threads_per_sm": 2048,
"max_threads_per_block": 1024,
"registers_per_sm": 65536,
"registers_per_block": 65536,
"global_memory_size": 34086060032,
"global_memory_bus_peak_clock_rate": 850000000,
"global_memory_bus_width": 4096,
"global_memory_bus_bandwidth": 870400000000,
"l2_cache_size": 6291456,
"shared_memory_per_sm": 98304,
"shared_memory_per_block": 49152,
"ecc_state": false
},
{
"id": 1,
"name": "Quadro GP100",
"sm_version": 600,
"ptx_version": 600,
"sm_default_clock_rate": 1442500000,
"number_of_sms": 56,
"max_blocks_per_sm": 32,
"max_threads_per_sm": 2048,
"max_threads_per_block": 1024,
"registers_per_sm": 65536,
"registers_per_block": 65536,
"global_memory_size": 17069309952,
"global_memory_bus_peak_clock_rate": 715000000,
"global_memory_bus_width": 4096,
"global_memory_bus_bandwidth": 732160000000,
"l2_cache_size": 4194304,
"shared_memory_per_sm": 65536,
"shared_memory_per_block": 49152,
"ecc_state": false
}
],
"benchmarks": [
{
"name": "simple",
"index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": null,
"states": [
{
"name": "Device=0",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": null,
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "499"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010094458717434867"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005997663682735138"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010034715849794225"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005782350585973689"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.51435071"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "524"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001001475909284053"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524782268"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": null,
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "499"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010075622164328662"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004836642334083953"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010027443022431728"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00034308545348455907"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.512193993"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "524"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010014738126565483"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5247834060000001"
}
]
}
],
"is_skipped": false
}
]
},
{
"name": "single_float64_axis",
"index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": [
{
"name": "Duration",
"type": "float64",
"flags": "",
"values": [
{
"input_string": "0",
"description": "",
"value": 0.0
},
{
"input_string": "0.0001",
"description": "",
"value": 0.0001
},
{
"input_string": "0.0002",
"description": "",
"value": 0.0002
},
{
"input_string": "0.0003",
"description": "",
"value": 0.00030000000000000003
},
{
"input_string": "0.0004",
"description": "",
"value": 0.0004
},
{
"input_string": "0.0005",
"description": "",
"value": 0.0005
},
{
"input_string": "0.0006",
"description": "",
"value": 0.0006000000000000001
},
{
"input_string": "0.0007",
"description": "",
"value": 0.0007000000000000001
},
{
"input_string": "0.0008",
"description": "",
"value": 0.0008000000000000001
},
{
"input_string": "0.0009",
"description": "",
"value": 0.0009000000000000002
},
{
"input_string": "0.001",
"description": "",
"value": 0.0010000000000000002
}
]
}
],
"states": [
{
"name": "Device=0 Duration=0",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "127488"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "9.540251349146535e-06"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.4435508787705211"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "3.9224058844425625e-06"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.14064817853323436"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "11.490547931"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "274905"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.8188127571551e-06"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.500083096"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0001",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "4853"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010853461796826674"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.05359830602702947"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010302987600936478"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00484111901842999"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.637141422"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "5092"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0001013762061275653"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.51621627"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0002",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2459"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00020891169174461132"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.02717422799526722"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00020340182381027155"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002406936807045068"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5674029660000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2582"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00020172880307174672"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.520873229"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0003",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.00030000000000000003"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1652"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003081868111380144"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.018240770684480382"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00030268341853095175"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0016523707958282026"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5447823540000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1736"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00030105657621462773"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5226434630000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0004",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0004"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1241"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00040852977276389983"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.013603343023457075"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00040306361880540617"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0012210042127847492"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.53335829"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1304"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00040140879812416123"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5234471270000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0005",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0005"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "994"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005089016619718308"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010853962612041912"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000503456178265558"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009750606561696034"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5268077360000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1044"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005017619516657686"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523849472"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0006",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0006000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "830"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006082555698795184"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009191209785025295"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000602735921345563"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008234812151490051"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522369137"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "872"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006010903174724053"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524159087"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0007",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0007000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "712"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000708571620786517"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007823433090894212"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007030903266721907"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007055254847806133"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5193877680000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "748"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007014426981064088"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524686893"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0008",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0008000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "623"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008089194157303374"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006828496858360085"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008034522826177895"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000611164680542835"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.516959448"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "654"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008017951428707951"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524383518"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0009",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0009000000000000002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "554"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009082872328519855"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006126265423787953"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009027800905360124"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00054941989913754"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.514815663"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "582"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009011235712320125"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524463788"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.001",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0010000000000000002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "499"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010086229759519048"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005485055388542774"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010031437666000492"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004923631784045008"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.513707802"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "524"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001001475909284053"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5247822560000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "153037"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "7.764162771094724e-06"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.5441551718680286"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "3.05725036652246e-06"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0422080578285922"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "15.000158798000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "369923"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.3516386844065532e-06"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5000379110000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0001",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "4880"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010714987602459019"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.04579843914769717"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010247656405124585"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.003070733813086406"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.623587079"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "5111"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010137617021268466"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5181448860000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0002",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2466"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002074915798864561"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.023038028738255983"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00020283785226716245"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0015459112691612667"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5598459640000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2588"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002017284788341021"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522084334"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0003",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.00030000000000000003"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1655"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00030685509425981873"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.015537955668238047"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00030217996281079384"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010230869145796749"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.539645641"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1737"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003010563825696243"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522946315"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0004",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0004"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1243"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004072019324215605"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.011695107558689763"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004025163378863194"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007675334678184685"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.529549118"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1304"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004014085172875527"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523448241"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0005",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0005"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "995"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005075862180904529"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009382010906172408"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005028912236343076"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000614146973517185"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523755894"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1044"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005017611916494552"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523849106"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0006",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0006000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "831"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000606902394705175"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007845243707907233"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006022033425301283"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005063896891280906"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.519957368"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "872"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006010888475890554"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524160638"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0007",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0007000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "712"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007072048665730335"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0066432216884127464"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000702561125326692"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000444186835693086"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.516785433"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "747"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007014415899274179"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523988754"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0008",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0008000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "623"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008075409711075438"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005835313968640737"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008028804110677048"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00038458153244696385"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.514684533"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "654"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008017937429818903"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524384532"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0009",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0009000000000000002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "555"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009069636108108111"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00524675883682327"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009022562016237966"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00035453453918318805"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.513753633"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "583"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009011228374919596"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.525365383"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.001",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0010000000000000002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "499"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010072655711422854"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004687180507983469"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010025901990328623"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003187607548154873"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.511897986"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "524"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010014750939289121"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524783733"
}
]
}
],
"is_skipped": false
}
]
},
{
"name": "copy_sweep_grid_shape",
"index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": [
{
"name": "BlockSize",
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "6",
"description": "2^6 = 64",
"value": 64
},
{
"input_string": "8",
"description": "2^8 = 256",
"value": 256
},
{
"input_string": "10",
"description": "2^10 = 1024",
"value": 1024
}
]
},
{
"name": "NumBlocks",
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "6",
"description": "2^6 = 64",
"value": 64
},
{
"input_string": "8",
"description": "2^8 = 256",
"value": 256
},
{
"input_string": "10",
"description": "2^10 = 1024",
"value": 1024
}
]
}
],
"states": [
{
"name": "Device=0 BlockSize=2^6 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "78"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006491010679487182"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014400823428293225"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006485689823444072"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001177496193520018"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "10347220700.783287"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "82777765606.2663"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.09510313144102286"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.507885141"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "81"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006479606722608024"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524857216"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^8 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "656"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002171159740853659"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008002387413205372"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0021657661977337647"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007621859662189712"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "30986199743.177273"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "247889597945.41818"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.2847996299924382"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.437881264"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "657"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0021637841704410677"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.425925528"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^10 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "752"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010918482712765959"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012885667055169438"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010864888095158216"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.011885928967750255"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "61766732811.45538"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "494133862491.64307"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5677089412817591"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.8365534670000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "753"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010831134183156693"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.8183730920000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^6 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "231"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002170435731601731"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004358290289328953"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0021650726464919703"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.003568770929248165"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "30996125746.05075"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "247969005968.406"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.28489086163649585"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.50610783"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "243"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0021624730742026746"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5254883690000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^8 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "848"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001072975840801887"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010997227168594192"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001067600981103923"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009773228014049224"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "62859500120.174065"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "502876000961.3925"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5777527584574822"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.9275834220000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "849"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010646783151390692"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.907651745"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^10 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1456"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009655372026098907"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007865605432218092"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009601815831693899"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005554818166025829"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "69891846684.33807"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "559134773474.7046"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6423882967310485"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.4363716210000002"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1457"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009584971580017669"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.4059789770000002"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^6 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "976"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010651546700819676"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010462843606792348"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010597991125016906"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009173166661501059"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "63322249668.22941"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "506577997345.83527"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5820059712153438"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.059860389"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "977"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010574653456130558"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.038207793"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^8 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1231"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009616476466287569"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007525336475207418"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009562607302014941"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004999666932127862"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "70178416702.1681"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "561427333617.3448"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6450222123361039"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.209430215"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1232"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009541545041486059"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.182675849"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^10 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "496"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001023795669354839"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.03114530461728092"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010184043220454653"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0306728390106973"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "65896091117.53555"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "527168728940.2844"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6056626021832312"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.518169856"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "542"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010072569899893336"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.54594076"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^6 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2244"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0066659496501782385"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012044246591117944"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0066612275798478"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012013908599240357"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "10074549052.04325"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "80596392416.346"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.11008029995676627"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "15.003487063000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2245"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006649818384514629"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "14.950437548000002"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^8 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "218"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0022997498486238524"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0031075885812940247"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0022950336933135985"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002319295976145478"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "29240905785.18147"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "233927246281.45175"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.319502904121301"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.505458689"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "228"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0022943040278919956"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5231123440000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^10 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "426"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011787892863849767"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0053738632436882575"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011741032116289985"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.003574335492607712"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "57157550831.40471"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "457260406651.2377"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6245361760424466"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.510194696"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "448"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011726912089756558"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.525375562"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^6 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "226"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002220062486725664"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0026283185437807914"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002215349671060005"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0015099121235202378"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "30292673376.42893"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "242341387011.43143"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.33099511993475667"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5059807590000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "237"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002214404399887922"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524823987"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^8 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "544"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001132157450367647"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007421825838534079"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011274838826673863"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006150104794311432"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59520907599.348335"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "476167260794.7867"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6503595673005719"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.626143416"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "545"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011260322986392797"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6144447430000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^10 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "447"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011234373914988803"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0046157037787769705"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011187847153985792"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0019678845031191957"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59983715433.66298"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "479869723469.30383"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6554164710846042"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5105567950000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "471"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001116505875962049"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5258857730000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^6 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "448"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011217261607142856"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0051297852216839"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011170590700847755"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002961082814438008"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "60076378946.466095"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "480611031571.72876"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6564289657612117"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.510912252"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "471"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011151960174495754"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.525268305"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^8 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "447"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011251578970917226"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005034162791339124"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011205025481964396"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0028024348992886213"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59891754916.5938"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "479134039332.7504"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6544116577425022"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.511320454"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "469"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011173903865854878"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524067611"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^10 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "474"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010600141455696206"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004975251378988354"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001055306057638257"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002145050602183929"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "63591849505.90315"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "508734796047.2252"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6948410129578578"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.51139696"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "499"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001053948106173284"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5259308100000001"
}
]
}
],
"is_skipped": false
}
]
},
{
"name": "copy_type_sweep",
"index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": [
{
"name": "T",
"type": "type",
"flags": "",
"values": [
{
"input_string": "U8",
"description": "uint8_t",
"is_active": true
},
{
"input_string": "U16",
"description": "uint16_t",
"is_active": true
},
{
"input_string": "U32",
"description": "uint32_t",
"is_active": true
},
{
"input_string": "U64",
"description": "uint64_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
}
],
"states": [
{
"name": "Device=0 T=U8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U8"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "3008"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0022984299517952063"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.024386082027668385"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002292998504448446"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.02420260340376021"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "117067436144.95683"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "234134872289.91367"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.2689968661419045"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "6.978260496000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "3009"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0022799289537926114"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "6.889595338"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=U16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 1,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U16"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "352"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001447471568181817"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006559305677944603"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014420469982380224"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0053741666978658"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "93074447756.55377"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "372297791026.2151"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.4277318371165155"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.51661557"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "364"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014379604465358862"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5234251630000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=U32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 2,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U32"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "960"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010732170854166677"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010747135588535793"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010678389670948187"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009483040603054523"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "62845490816.44543"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "502763926531.5634"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5776239964746822"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.050184244"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "961"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010641954020828663"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.027618288"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=U64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 3,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1232"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009393459350649342"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008173389439931525"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009339579984352195"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005779340051383488"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "35927131687.09736"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "574834106993.5577"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6604252148363485"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.1830953560000002"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1233"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009315095380951709"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.155529067"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 4,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "496"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001073113616935484"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01045196550212673"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010676812894882687"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009124433511281603"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "62854771981.7819"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "502838175854.2552"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5777093013031425"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.542738971"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "497"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010637711835818988"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528728784"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 5,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1232"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009395226306818184"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008519805595594534"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009341417393804389"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006275440558350422"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "35920065002.399605"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "574721040038.3937"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6602953125441103"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.183265443"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1233"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009317267593676144"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.155949535"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=U8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U8"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2640"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002704848576515149"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008230990844947604"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002700116645206098"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008022518759096101"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "99416244285.81326"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "198832488571.62653"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.2715697232457749"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "7.193921508000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2641"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002695621145895508"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "7.143042223"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=U16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 1,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U16"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "330"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0015221530787878797"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0057375488336673195"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0015174821813901261"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004846658675573777"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "88447646796.77927"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "353790587187.11707"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.48321485356632027"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.508487203"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "347"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001516773410764139"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5263325360000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=U32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 2,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U32"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "704"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011323334801136371"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007608036417757464"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001127679999409751"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0063837058695073836"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59510556217.30105"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "476084449738.4084"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6502464621645656"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.810527479"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "705"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011263038268326028"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.796312915"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=U64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 3,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "478"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010527462217573217"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005281184054309557"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010480330030289645"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002728222354093458"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "32016579538.070763"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "512265272609.1322"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.699663014380917"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.51213991"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "500"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00104523095703125"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522627688"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 4,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "464"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011329571594827575"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00779514172858376"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011281946900075874"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006520518090102098"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59483407070.05869"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "475867256560.46954"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6499498150137532"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.534519304"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "467"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011258338421774624"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5257752330000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 5,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "478"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010524297447698746"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0053011542236172425"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010477156826142983"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0027421021970940066"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "32026276361.802433"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "512420421788.8389"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6998749204939343"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.511908565"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "501"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010452877204575224"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5236998340000001"
}
]
}
],
"is_skipped": false
}
]
},
{
"name": "copy_type_conversion_sweep",
"index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": [
{
"name": "In",
"type": "type",
"flags": "",
"values": [
{
"input_string": "I8",
"description": "int8_t",
"is_active": true
},
{
"input_string": "I16",
"description": "int16_t",
"is_active": true
},
{
"input_string": "I32",
"description": "int32_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "I64",
"description": "int64_t",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
},
{
"name": "Out",
"type": "type",
"flags": "",
"values": [
{
"input_string": "I8",
"description": "int8_t",
"is_active": true
},
{
"input_string": "I16",
"description": "int16_t",
"is_active": true
},
{
"input_string": "I32",
"description": "int32_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "I64",
"description": "int64_t",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
}
],
"states": [
{
"name": "Device=0 In=I8 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=I8 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 1,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "992"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006600980292338716"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.06265755233269708"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006543757735841723"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.061480066936899634"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "102554016681.31558"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "307662050043.9468"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.35347202440710795"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6754950980000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "993"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006166902596256644"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.613870906"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I8 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 2,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "684"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007370927309941522"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008729576791697422"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007317126547558279"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004720480993831976"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "91714778422.67767"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "458573892113.3883"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5268541959023303"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5183597600000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "723"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007290440898383471"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5271070320000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I8 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 3,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "680"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007416632955882347"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00842788883853582"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007362919512917023"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004273389706237406"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "91144367234.04161"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "455721836170.20807"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5235774772176104"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.518484646"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "712"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007338700883843925"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522523862"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I8 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 4,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "536870912"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "528"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0012047073446969693"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009645329133519535"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011993323018153505"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0085331592060071"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "55955187647.6784"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "503596688829.1056"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5785807546290276"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.647215045"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "529"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001196625677083526"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6339911500000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I8 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 5,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "536870912"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1200"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001180585506666666"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010080936476778664"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001175182694693406"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008979393908657816"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "57105047838.97287"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "513945430550.75586"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5904703935555559"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.4422983710000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1201"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011731425545594744"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.4168893610000002"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I16 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 6,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I16 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 7,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=I16 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 8,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1696"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00043055614622641435"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01653471806668262"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00042517247028157185"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010615851149343741"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "78919578160.31331"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "473517468961.8799"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5440228273918657"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.766234603"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1697"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004232069438320117"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.7227836270000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I16 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 9,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1184"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00043288785641891876"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01493008601829662"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004275269453740999"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008146558252326382"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "78484952499.63434"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "470909714997.8061"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5410267865324059"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.537378945"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1238"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000424921975574894"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.526063107"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I16 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 10,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "768"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006571356510416664"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010841823646108464"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006517510409466911"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007001797627972981"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "51483511175.15826"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "514835111751.5826"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5914925456704763"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5206853530000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "811"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006489950196516646"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.526342823"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I16 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 11,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "768"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006567598033854167"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010681410148370487"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006514065422428152"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006845472386064585"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "51510738416.09102"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "515107384160.9102"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5918053586407517"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5204057160000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "805"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006483509893002717"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.521930972"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I32 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 12,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I32 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 13,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I32 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 14,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=I32 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 15,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1904"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002683022746848742"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0242735351289231"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026296673859117433"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.013347568443463995"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "63799764524.90815"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "510398116199.2652"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5863948945304058"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.550977791"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2015"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002597611117303815"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523427203"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I32 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 16,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1328"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000382968452560241"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.016207309597693925"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00037759019212281323"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0077904180984165565"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44432340537.44468"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "533188086449.3362"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6125782243213881"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5365996590000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1396"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003753899079680784"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524052128"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I32 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 17,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1328"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00038311062575301184"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.016679525157534167"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00037773971044155724"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00868318074979036"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44414753165.31679"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "532977037983.8014"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6123357513600659"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.53682523"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1367"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003756192367096059"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.513480453"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F32 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 18,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F32 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 19,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F32 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 20,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1904"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026897186554621826"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.02368215369603371"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026361388154327934"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012168663554835448"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "63643143152.328896"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "509145145218.63116"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5849553598559641"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5524873680000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1991"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026033155670242655"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5183284650000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F32 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 21,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=F32 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 22,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1328"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003830804947289157"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01669586098492127"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003777089391846278"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008791389028513721"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44418371554.07946"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "533020458648.95355"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6123856372345514"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.536636844"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1404"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003754535468555244"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.527143864"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F32 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 23,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1328"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003830802665662652"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01641959194869226"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00037772002358393717"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008238450770780798"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44417068072.833466"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "533004816874.0016"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6123676664453144"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.536685893"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1389"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000375325125357159"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.521334187"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I64 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 24,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I64 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 25,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I64 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 26,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I64 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 27,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I64 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 28,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=I64 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 29,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "8388608"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2112"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002422139554924242"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.02400462331990132"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00023683451699572973"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007866889921134117"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "35419701935.386604"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "566715230966.1857"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6510974620475479"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.556787641"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2225"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00023412507132198032"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5209357530000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F64 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 30,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F64 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 31,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F64 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 32,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F64 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 33,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F64 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 34,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "8388608"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2112"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00024282649337121185"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.024008982414037136"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002374703656091845"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008267454870626736"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "35324862445.386154"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "565197799126.1785"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6493540890695985"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5575585390000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2214"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00023455057945354847"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.519302479"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F64 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 35,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I8 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I8 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 1,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "992"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006824859284274195"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.032440596768964644"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006776485806030618"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.03134186379501587"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "99031955383.5376"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "297095866150.6128"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.40577997452826264"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.696101135"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "993"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006594375443362513"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6568292870000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I8 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 2,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "592"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008640237381756752"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01007729538332117"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008593635128156565"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008503073738172521"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "78091358312.52779"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "390456791562.639"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5332943503641813"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522774643"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "615"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008578066445947664"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.527563349"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I8 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 3,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "656"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008614595929878056"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0098819558217369"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008567909279429336"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008239605399217536"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "78325834006.11096"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "391629170030.5548"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5348956102908583"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.577643271"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "657"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008551398322462489"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.562343075"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I8 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 4,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "536870912"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "528"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014573860359848496"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006512604739977204"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014527478784774298"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00567750642698368"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "46194432629.517426"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "415749893665.65686"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5678402175284868"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.779591082"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "529"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014509199143357438"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.7691349590000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I8 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 5,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "536870912"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "352"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014603711335227268"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006314392964515164"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014557048187337147"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005429185609780399"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "46100598923.87834"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "414905390314.905"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5666867765446146"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.520741476"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "363"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014527880455836777"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.527373104"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I16 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 6,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I16 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 7,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I16 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 8,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1104"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004611589565217395"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012940549047826096"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004564886665700571"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007894471093785426"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "73505509462.30429"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "441033056773.82574"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6023725097981667"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.530236417"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1149"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00045453528926308207"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5222721800000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I16 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 9,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1104"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004598074438405802"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012817089346835498"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004551599129116618"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007723313560215716"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "73720094955.97276"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "442320569735.83655"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6041310229128012"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528748192"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1166"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00045311976542399224"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5283480380000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I16 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 10,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "672"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007533759211309518"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00846442255799797"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007486925714959693"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005711239714249503"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44817370009.36792"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "448173700093.6792"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6121253552415854"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.518973692"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "701"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007453038627853066"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5224711350000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I16 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 11,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "672"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007515454092261914"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008392859436579346"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000746849381497928"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005530757922319482"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44927977221.72726"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "449279772217.2726"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6136360525257766"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.517848211"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "704"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007438007701526989"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523646363"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I32 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 12,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I32 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 13,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I32 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 14,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I32 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 15,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1840"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00027774970760869537"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01802281728547624"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00027309293929973365"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0057206437926147526"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "61434089226.254715"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "491472713810.0377"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6712640868253356"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.547010069"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1923"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002714794236300702"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522066944"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I32 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 16,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1195"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00042314036485355624"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012144571947569476"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00041846681174872806"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00470128993484091"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "40092106539.79896"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "481105278477.5875"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6571040188996771"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5286744840000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1263"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00041609304251410327"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.525537319"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I32 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 17,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1195"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004230686694560669"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012123446476677473"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000418410443611225"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004701276954550611"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "40097507737.136955"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "481170092845.6435"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6571925437686346"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528583919"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1258"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004162648114566773"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523674003"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F32 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 18,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F32 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 19,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F32 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 20,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1808"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00028176399834070837"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.021188068496395096"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002770714691914288"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012675642708623109"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "60551943687.88875"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "484415549503.11"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6616252588274557"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.544610159"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1911"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00027552812178056315"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.526544561"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F32 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 21,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=F32 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 22,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1195"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00042314520585774067"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0121383962955979"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004184789956862957"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004742822949323976"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "40090939265.62733"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "481091271187.5279"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6570848874392591"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528606066"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1257"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00041616969385503683"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523135939"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F32 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 23,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1195"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004232182125523013"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012193893187009835"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00041854015763334633"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004766137974469347"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "40085080712.12928"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "481020968545.5514"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6569888665668042"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528650917"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1253"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004162007763399092"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5215109210000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I64 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 24,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I64 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 25,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I64 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 26,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I64 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 27,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I64 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 28,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I64 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 29,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "8388608"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1909"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002666127674174964"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.018360326425103806"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026193604216705505"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004085453563964175"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "32025405631.84502"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "512406490109.5203"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6998558923043056"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.546196184"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1976"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002601132721070819"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5139936970000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F64 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 30,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F64 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 31,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F64 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 32,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F64 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 33,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F64 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 34,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "8388608"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1910"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002665624010471204"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.018356743233435932"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026189029580323475"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004090112866661142"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "32030999752.287834"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "512495996036.60535"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6999781414398565"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.546475312"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2007"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026003379042491905"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.521898494"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F64 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 35,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
}
]
}
]
}