Files
nvbench/scripts/test_ref.json

18798 lines
570 KiB
JSON

{
"devices": [
{
"id": 0,
"name": "NVIDIA Quadro GV100",
"sm_version": 700,
"ptx_version": 700,
"sm_default_clock_rate": 1627000000,
"number_of_sms": 80,
"max_blocks_per_sm": 32,
"max_threads_per_sm": 2048,
"max_threads_per_block": 1024,
"registers_per_sm": 65536,
"registers_per_block": 65536,
"global_memory_size": 34078982144,
"global_memory_bus_peak_clock_rate": 850000000,
"global_memory_bus_width": 4096,
"global_memory_bus_bandwidth": 870400000000,
"l2_cache_size": 6291456,
"shared_memory_per_sm": 98304,
"shared_memory_per_block": 49152,
"ecc_state": false
},
{
"id": 1,
"name": "NVIDIA Quadro GP100",
"sm_version": 600,
"ptx_version": 600,
"sm_default_clock_rate": 1442500000,
"number_of_sms": 56,
"max_blocks_per_sm": 32,
"max_threads_per_sm": 2048,
"max_threads_per_block": 1024,
"registers_per_sm": 65536,
"registers_per_block": 65536,
"global_memory_size": 17069309952,
"global_memory_bus_peak_clock_rate": 715000000,
"global_memory_bus_width": 4096,
"global_memory_bus_bandwidth": 732160000000,
"l2_cache_size": 4194304,
"shared_memory_per_sm": 65536,
"shared_memory_per_block": 49152,
"ecc_state": false
}
],
"benchmarks": [
{
"index": 0,
"name": "simple",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": null,
"states": {
"Device=0": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": null,
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "486"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010094132736625523"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005987183296179167"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010034002306039446"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005072701393681687"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001001473929135854"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "524"
}
}
},
"is_skipped": false
},
"Device=1": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": null,
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "488"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010074898913934418"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005542305355933818"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010027081287298028"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00035037919649082367"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010014748609703007"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "524"
}
}
},
"is_skipped": false
}
}
},
{
"index": 1,
"name": "single_float64_axis",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": {
"Duration": {
"type": "float64",
"flags": "",
"values": [
{
"input_string": "0",
"description": "",
"value": 0.0
},
{
"input_string": "0.0001",
"description": "",
"value": 0.0001
},
{
"input_string": "0.0002",
"description": "",
"value": 0.0002
},
{
"input_string": "0.0003",
"description": "",
"value": 0.00030000000000000003
},
{
"input_string": "0.0004",
"description": "",
"value": 0.0004
},
{
"input_string": "0.0005",
"description": "",
"value": 0.0005
},
{
"input_string": "0.0006",
"description": "",
"value": 0.0006000000000000001
},
{
"input_string": "0.0007",
"description": "",
"value": 0.0007000000000000001
},
{
"input_string": "0.0008",
"description": "",
"value": 0.0008000000000000001
},
{
"input_string": "0.0009",
"description": "",
"value": 0.0009000000000000002
},
{
"input_string": "0.001",
"description": "",
"value": 0.0010000000000000002
}
]
}
},
"states": {
"Device=0 Duration=0": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "14061"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "9.102689638005845e-06"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.033946388108068055"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "3.7547417902904438e-06"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.12549022159970946"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "1.630773172830879e-06"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "306655"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0001": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "3835"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00010860168552803123"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004007949999262656"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00010303751935470811"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004789691009751296"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00010137620362095862"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "5088"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0002": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "2174"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00020898149126034966"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002070700973146156"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00020338884861017417"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002418204625044133"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002017283984223771"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2583"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0003": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.00030000000000000003"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1520"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00030825112500000015"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0014009307905580174"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00030272901975793895"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0016163896900565434"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003010571695496376"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1742"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0004": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0004"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1166"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004085718481989706"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0010690404823574895"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00040307120334734023"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0012226190019077351"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004014095938278854"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1304"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0005": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0005"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "945"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0005089798201058188"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0008530028319072816"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0005034217145707861"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0009752402596440034"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0005017619516657686"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1044"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0006": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0006000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "796"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006082355979899511"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0007134353357638104"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006027260286424639"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0008279817736951732"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006010891975612815"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "872"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0007": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0007000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "685"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007086865854014601"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0006295331091145095"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007030571342384726"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0007151653876403053"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007014426981064088"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "748"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0008": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0008000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "602"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0008090872425249167"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005562631850494214"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008034305715085621"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0006219681072125149"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008017951428707951"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "654"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.0009": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0009000000000000002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "538"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0009084568382899636"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005031047519089767"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009027937730448745"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005501738587938111"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009011217884181701"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "582"
}
}
},
"is_skipped": false
},
"Device=0 Duration=0.001": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0010000000000000002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "487"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010086481827515403"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0004266615566594544"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010031193825253714"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0004975122529595318"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001001475909284053"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "524"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "15089"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "8.108349592418312e-06"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.05461449121054022"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "3.271210544150035e-06"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.059765735669007766"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "1.3421442998656208e-06"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "372558"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0001": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "3944"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00010710262145030443"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004128650771669589"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00010247565930403145"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0030818570098060543"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00010137613820964433"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "5117"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0002": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "2193"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00020765215686274505"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0021648763590408093"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00020284258628946086"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0015447061481155045"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002017285137353667"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2584"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0003": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.00030000000000000003"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1537"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0003068213201040992"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0013512096196898148"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00030219575751114794"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0010543163243715088"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003010567871656286"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1736"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0004": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0004"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1176"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00040721289880952437"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0010016437258221326"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004025331704186726"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0007456691947680211"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004014084236753499"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1304"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0005": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0005"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "951"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0005075412103049417"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.000846863074833117"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0005028813449366248"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0006292766848433991"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0005017613753177333"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1045"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0006": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0006000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "800"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006068351487499997"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0006561812659454387"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006021752006560568"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.000511717182892197"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006010895299747637"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "873"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0007": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0007000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "690"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007071279246376804"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0005386426703062701"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007025530446266783"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00042821786377290075"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007014415557371741"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "748"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0008": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0008000000000000001"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "605"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0008076996363636364"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0006014433173443102"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008029008409208492"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00036509958633429017"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008017936496559632"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "654"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.0009": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0009000000000000002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "540"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0009070510574074071"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00046472458647248545"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009022252441556363"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00033895812399517745"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009011227322607926"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "582"
}
}
},
"is_skipped": false
},
"Device=1 Duration=0.001": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"Duration": {
"type": "float64",
"value": "0.0010000000000000002"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "488"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010073550901639342"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0004238073408932392"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010025966528986322"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0003136332645329908"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001001473929135854"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "524"
}
}
},
"is_skipped": false
}
}
},
{
"index": 2,
"name": "copy_sweep_grid_shape",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": {
"BlockSize": {
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "6",
"description": "2^6 = 64",
"value": 64
},
{
"input_string": "8",
"description": "2^8 = 256",
"value": 256
},
{
"input_string": "10",
"description": "2^10 = 1024",
"value": 1024
}
]
},
"NumBlocks": {
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "6",
"description": "2^6 = 64",
"value": 64
},
{
"input_string": "8",
"description": "2^8 = 256",
"value": 256
},
{
"input_string": "10",
"description": "2^10 = 1024",
"value": 1024
}
]
}
},
"states": {
"Device=0 BlockSize=2^6 NumBlocks=2^6": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "71"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.007065658352112677"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.05358128799632556"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.007059958081849862"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.053589324741995806"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "9505561254.326319"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "76044490034.61055"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.08736729094049925"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.006475561071325232"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "81"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^8 NumBlocks=2^6": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "229"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0021687765283842793"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006699637202043051"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0021633964730141996"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00669331351204079"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "31020141170.19388"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "248161129361.55103"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.28511159163781136"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.002160161503025743"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "244"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^10 NumBlocks=2^6": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "448"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010963011227678571"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.013516109455086892"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001090899714667882"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.013565950821979889"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "61516987398.26961"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "492135899186.15686"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5654134871164486"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010871857387360318"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "481"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^6 NumBlocks=2^8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "229"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.002169116519650655"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.003880325099879575"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0021636720515755057"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0038733421374846436"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "31016190254.495274"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "248129522035.9622"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.2850752780744051"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0021606314702289093"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "243"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^8 NumBlocks=2^8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "456"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010761263311403508"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.011961974879208899"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001070721754902288"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.012050980053815875"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "62676286993.08928"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "501410295944.71423"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5760688142747177"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010696770163143381"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "493"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^10 NumBlocks=2^8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "500"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.000980373466000001"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005650663121151804"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009750024316310896"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005680157515531913"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "68829432443.29456"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "550635459546.3564"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6326234599567514"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009731230225510264"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "542"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^6 NumBlocks=2^10": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "459"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010701848496732027"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008617668166839768"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010647455503218568"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008515610201608317"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "63028076501.20161"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "504224612009.61285"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5793021737242795"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010672177234327936"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "498"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^8 NumBlocks=2^10": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "500"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.000979696614"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0074291976714003565"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009743501433134098"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0074775515242700395"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "68875510986.00674"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "551004087888.054"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6330469759743267"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009717721991970888"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "541"
}
}
},
"is_skipped": false
},
"Device=0 BlockSize=2^10 NumBlocks=2^10": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "475"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010337088463157895"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.021637984186463816"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010282407758110449"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.02173209936637211"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "65265709723.54853"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "522125677788.38824"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.599868655547321"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010291563019039125"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "508"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^6 NumBlocks=2^6": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "76"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.006647754513157893"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0011430629751785044"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.006643085875009235"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0011389249175732911"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "10102061792.16473"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "80816494337.31784"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.11038091993186987"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.006639652300484573"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "79"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^8 NumBlocks=2^6": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "216"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.002300918597222223"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0022260554559899452"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0022963019234162794"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0022413389898784455"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "29224756255.11826"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "233798050040.94608"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.3193264450952607"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0022975726211280152"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "228"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^10 NumBlocks=2^6": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "418"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011795720191387577"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0035334409960244696"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001174919423874485"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0035420884521558988"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "57117843688.972115"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "456942749511.7769"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6241023130350974"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011729015622820172"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "448"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^6 NumBlocks=2^8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "224"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0022223120000000006"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0014441799301084402"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00221759328778301"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.001434325968668793"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "30262025218.83109"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "242096201750.6487"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.3306602405903747"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.002216961359573623"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "236"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^8 NumBlocks=2^8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "435"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011336455977011492"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006534400600481561"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001129045183631195"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0065638034102788135"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "59438599068.433075"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "475508792547.4646"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6494602170938929"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011272204485062364"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "466"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^10 NumBlocks=2^8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "256"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "437"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011265385652173912"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002220966435104119"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011218978122933775"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0022003475082832675"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "59817269687.70571"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "478538157501.6457"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6535977894198614"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001119863004765959"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "468"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^6 NumBlocks=2^10": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "64"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "439"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011232369088838266"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00285184985884414"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011185731920403065"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0028360480110887457"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "59995058416.86738"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "479960467334.939"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6555404110234635"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011161975045489451"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "468"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^8 NumBlocks=2^10": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "256"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "440"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011206702840909095"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002536479032620614"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011160453837026254"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0025536971451898373"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "60130945371.914566"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "481047562975.3165"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6570251898155001"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011138856279089096"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "470"
}
}
},
"is_skipped": false
},
"Device=1 BlockSize=2^10 NumBlocks=2^10": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"BlockSize": {
"type": "int64",
"value": "1024"
},
"NumBlocks": {
"type": "int64",
"value": "1024"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "464"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010597870474137931"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0020209648798997564"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010551077248207455"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002011195776784625"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "63603803120.10441"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "508830424960.83527"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6949716250011408"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010536742918941392"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "498"
}
}
},
"is_skipped": false
}
}
},
{
"index": 3,
"name": "copy_type_sweep",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": {
"T": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "U8",
"description": "uint8_t",
"is_active": true
},
{
"input_string": "U16",
"description": "uint16_t",
"is_active": true
},
{
"input_string": "U32",
"description": "uint32_t",
"is_active": true
},
{
"input_string": "U64",
"description": "uint64_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
}
},
"states": {
"Device=0 T=U8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U8"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "217"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.002284935774193548"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.003019023225421965"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0022794654072704396"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0030185067855524154"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "117762460945.3669"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "235524921890.7338"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.2705938900399056"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0022792820785984846"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "231"
}
}
},
"is_skipped": false
},
"Device=0 T=U16": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U16"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "341"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0014459254017595295"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005620271181121053"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0014404413371491634"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005659383776137258"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "93178197916.5051"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "372712791666.0204"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.4282086301309977"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0014370339589576198"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "365"
}
}
},
"is_skipped": false
},
"Device=0 T=U32": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U32"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "456"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010763392214912279"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009580925422442722"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010708663173412028"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009602261983780735"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "62667825958.53892"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "501342607668.31134"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5759910474130415"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010690977880559816"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "489"
}
}
},
"is_skipped": false
},
"Device=0 T=U64": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "514"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0009534325642023344"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007974682202520992"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009479809484593146"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008022855237026269"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "35395681795.64538"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "566330908730.326"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6506559153611283"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009457213474094653"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "554"
}
}
},
"is_skipped": false
},
"Device=0 T=F32": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "456"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010769479144736836"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.011261863999383217"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001071445541946512"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.011287071608158339"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "62633947664.836296"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "501071581318.69037"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5756796660370983"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001069358981385523"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "490"
}
}
},
"is_skipped": false
},
"Device=0 T=F64": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "514"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0009534943599221791"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006006780711077088"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009480226613900089"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00602313677626831"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "35394124388.125755"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "566305990210.0121"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6506272865464293"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0009457029259723165"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "552"
}
}
},
"is_skipped": false
},
"Device=1 T=U8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U8"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "184"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00270240325"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0033226300614619185"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.002697714079981265"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0033217171224860604"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "99504783695.18842"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "199009567390.37683"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.2718115813351956"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0026982716095753207"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "195"
}
}
},
"is_skipped": false
},
"Device=1 T=U16": {
"device": 1,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U16"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "325"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0015216281538461547"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0046556036312148845"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0015169812690294725"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004682337277211795"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "88476852509.76712"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "353907410039.0685"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.4833744127500389"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0015158526066057275"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "347"
}
}
},
"is_skipped": false
},
"Device=1 T=U32": {
"device": 1,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U32"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "435"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011331533540229887"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006418753103730108"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011284679349811587"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0064621372230947265"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "59469003876.588196"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "475752031012.70557"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6497924374627206"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011265910963430138"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "467"
}
}
},
"is_skipped": false
},
"Device=1 T=U64": {
"device": 1,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "U64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "468"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010515641474358975"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002762541639974713"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001046885606570122"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002764528097772722"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "32051670009.99595"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512826720159.9352"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.7004298516170443"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001044835600653889"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "503"
}
}
},
"is_skipped": false
},
"Device=1 T=F32": {
"device": 1,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "435"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011328659609195397"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006308260028809877"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011281658846756504"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.006329740046854081"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "59484925853.163795"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "475879406825.31036"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6499664101088701"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011261699270694815"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "470"
}
}
},
"is_skipped": false
},
"Device=1 T=F64": {
"device": 1,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"T": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "468"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0010518281880341881"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.002638709647720786"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010471613009770718"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0026399350413532966"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "32043231514.27718"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512691704228.4349"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.7002454439308824"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0010447449703140563"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "502"
}
}
},
"is_skipped": false
}
}
},
{
"index": 4,
"name": "copy_type_conversion_sweep",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"devices": [
0,
1
],
"axes": {
"In": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "I8",
"description": "int8_t",
"is_active": true
},
{
"input_string": "I16",
"description": "int16_t",
"is_active": true
},
{
"input_string": "I32",
"description": "int32_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "I64",
"description": "int64_t",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
},
"Out": {
"type": "type",
"flags": "",
"values": [
{
"input_string": "I8",
"description": "int8_t",
"is_active": true
},
{
"input_string": "I16",
"description": "int16_t",
"is_active": true
},
{
"input_string": "I32",
"description": "int32_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "I64",
"description": "int64_t",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
}
},
"states": {
"Device=0 In=I8 Out=I8": {
"device": 0,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=I8 Out=I16": {
"device": 0,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "775"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006248230980645156"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0027640779893251216"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006193935315070645"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0028186397219177456"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "108346084655.93024"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "325038253967.7907"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.37343549398873016"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006171660299862132"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "850"
}
}
},
"is_skipped": false
},
"Device=0 In=I8 Out=I32": {
"device": 0,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "660"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007372658136363634"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004348049843468552"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007317814296845251"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004351029775591727"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "91706158803.36154"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "458530794016.8077"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5268046806259279"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007299521218782687"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "717"
}
}
},
"is_skipped": false
},
"Device=0 In=I8 Out=F32": {
"device": 0,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "656"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.000742387521341463"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0041525675601748364"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007369443420775064"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004193469264853706"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "91063680346.35373"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "455318401731.7686"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5231139725778592"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007352807822347689"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "714"
}
}
},
"is_skipped": false
},
"Device=0 In=I8 Out=I64": {
"device": 0,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "536870912"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "407"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0012095483882063889"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009732185124544102"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001204128551248836"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009798212399727946"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "55732308589.8092"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "501590777308.2828"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.576276168782494"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0012017273091491842"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "429"
}
}
},
"is_skipped": false
},
"Device=0 In=I8 Out=F64": {
"device": 0,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "536870912"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "415"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0011847366168674703"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.011261383409993239"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011792877487389432"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.011302242538631406"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "56906267424.351715"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512156406819.16547"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5884149894521662"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0011767830588600852"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "440"
}
}
},
"is_skipped": false
},
"Device=0 In=I16 Out=I8": {
"device": 0,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I16 Out=I16": {
"device": 0,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=I16 Out=I32": {
"device": 0,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1105"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00043142517375565617"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.01116818587784149"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00042600826737028365"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.011332580467569093"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "78764743715.25449"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "472588462291.5269"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5429554943606697"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00042359266142467694"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1238"
}
}
},
"is_skipped": false
},
"Device=0 In=I16 Out=F32": {
"device": 0,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1102"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00043289838384754937"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008465395678081931"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00042745939692221985"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008617999240612035"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "78497354933.81969"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "470984129602.9181"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5411122812533525"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00042536910129233627"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1229"
}
}
},
"is_skipped": false
},
"Device=0 In=I16 Out=I64": {
"device": 0,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "734"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006609588569482289"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007896476276327823"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006555628124472239"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007981909890800989"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "51184160179.466095"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "511841601794.66095"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5880533108854101"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006538430490801411"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "806"
}
}
},
"is_skipped": false
},
"Device=0 In=I16 Out=F64": {
"device": 0,
"type_config_index": 11,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "734"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006605395899182562"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007740408518735753"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006550883051485072"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007833851008491804"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "51221234963.72489"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512212349637.2489"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5884792619913246"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006534532250824923"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "805"
}
}
},
"is_skipped": false
},
"Device=0 In=I32 Out=I8": {
"device": 0,
"type_config_index": 12,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I32 Out=I16": {
"device": 0,
"type_config_index": 13,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I32 Out=I32": {
"device": 0,
"type_config_index": 14,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=I32 Out=F32": {
"device": 0,
"type_config_index": 15,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1735"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00026702492853025945"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.01324576727299336"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00026161364844278195"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.013450268523907918"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "64129742847.37816"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "513037942779.02527"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5894277835236963"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00025957003988639885"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2015"
}
}
},
"is_skipped": false
},
"Device=0 In=I32 Out=I64": {
"device": 0,
"type_config_index": 16,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1234"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0003841953128038892"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008873245446388355"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003788044850192556"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008932234099031263"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44289908550.44172"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "531478902605.3006"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6106145480299869"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037766468619885956"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1381"
}
}
},
"is_skipped": false
},
"Device=0 In=I32 Out=F64": {
"device": 0,
"type_config_index": 17,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1235"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0003840312064777327"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009389520289783196"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037863498520754796"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009545097161422792"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44309735379.58624"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "531716824555.03485"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6108878958582662"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003773968978051128"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1396"
}
}
},
"is_skipped": false
},
"Device=0 In=F32 Out=I8": {
"device": 0,
"type_config_index": 18,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F32 Out=I16": {
"device": 0,
"type_config_index": 19,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F32 Out=I32": {
"device": 0,
"type_config_index": 20,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1726"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00026856249884125153"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.01342456387766187"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00026315643022814674"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.013724796519135959"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "63753775598.24316"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "510030204785.94525"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5859722021897349"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002609094005709575"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2047"
}
}
},
"is_skipped": false
},
"Device=0 In=F32 Out=F32": {
"device": 0,
"type_config_index": 21,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=F32 Out=I64": {
"device": 0,
"type_config_index": 22,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1235"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0003840352834008098"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009209302867708775"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037863381922486526"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009434239106344595"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44309871829.05669"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "531718461948.68024"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6108897770550095"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037729541193829834"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1370"
}
}
},
"is_skipped": false
},
"Device=0 In=F32 Out=F64": {
"device": 0,
"type_config_index": 23,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1233"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0003844534225466336"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009387088977698597"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00037907109053659035"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009568452852068391"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44258758894.67376"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "531105106736.0851"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6101850950552448"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0003776787067281789"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1392"
}
}
},
"is_skipped": false
},
"Device=0 In=I64 Out=I8": {
"device": 0,
"type_config_index": 24,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I64 Out=I16": {
"device": 0,
"type_config_index": 25,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I64 Out=I32": {
"device": 0,
"type_config_index": 26,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I64 Out=F32": {
"device": 0,
"type_config_index": 27,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=I64 Out=I64": {
"device": 0,
"type_config_index": 28,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=0 In=I64 Out=F64": {
"device": 0,
"type_config_index": 29,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "8388608"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1865"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002468652632707771"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008794568336063534"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002414397094508553"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009088437943671243"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "34744110730.913086"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "555905771694.6094"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6386785060829612"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00023926271107803853"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2168"
}
}
},
"is_skipped": false
},
"Device=0 In=F64 Out=I8": {
"device": 0,
"type_config_index": 30,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F64 Out=I16": {
"device": 0,
"type_config_index": 31,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F64 Out=I32": {
"device": 0,
"type_config_index": 32,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F64 Out=F32": {
"device": 0,
"type_config_index": 33,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=0 In=F64 Out=I64": {
"device": 0,
"type_config_index": 34,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "8388608"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1861"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002474318479312196"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009416123268532244"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00024199313163148308"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.009609928378243537"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "34664653262.864136"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "554634452205.8262"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6372178908614731"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00024011272523290366"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2177"
}
}
},
"is_skipped": false
},
"Device=0 In=F64 Out=F64": {
"device": 0,
"type_config_index": 35,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I8 Out=I8": {
"device": 1,
"type_config_index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I8 Out=I16": {
"device": 1,
"type_config_index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "715"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0006812909104895107"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.029682520209047932"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0006765060471488043"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.029725089166496972"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "99199207875.28265"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "297597623625.84796"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.4064652857651988"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.000659287437142213"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "797"
}
}
},
"is_skipped": false
},
"Device=1 In=I8 Out=I32": {
"device": 1,
"type_config_index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "566"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0008641483356890464"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00815440605473416"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008593197461783684"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008178118032486274"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "78095335640.14047"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "390476678200.70233"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5333215119655572"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008574365556141886"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "607"
}
}
},
"is_skipped": false
},
"Device=1 In=I8 Out=F32": {
"device": 1,
"type_config_index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "568"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0008621727816901408"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008348927642653206"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.000857566987334842"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.008407666935430734"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "78254952663.88672"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "391274763319.4336"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5344115539218662"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0008559337940091401"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "612"
}
}
},
"is_skipped": false
},
"Device=1 In=I8 Out=I64": {
"device": 1,
"type_config_index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "536870912"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "339"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0014581254159292036"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005934832249204677"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.001453499562620765"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005963799027107206"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "46170542961.153595"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "415534886650.3824"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5675465562860337"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0014501432381838642"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "361"
}
}
},
"is_skipped": false
},
"Device=1 In=I8 Out=F64": {
"device": 1,
"type_config_index": 5,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I8"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "536870912"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "339"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0014608549616519177"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005454444454530878"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0014561624537527042"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0054738241927221685"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "46086110671.96002"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "414774996047.64026"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.5665086812276555"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0014524769206623453"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "364"
}
}
},
"is_skipped": false
},
"Device=1 In=I16 Out=I8": {
"device": 1,
"type_config_index": 6,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I16 Out=I16": {
"device": 1,
"type_config_index": 7,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I16 Out=I32": {
"device": 1,
"type_config_index": 8,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1042"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00046152389539347375"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007516961198942111"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004568425950928514"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0075614567935713"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "73448562722.52853"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "440691376335.17114"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6019058352479938"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00045486935942230756"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1156"
}
}
},
"is_skipped": false
},
"Device=1 In=I16 Out=F32": {
"device": 1,
"type_config_index": 9,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1047"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00045967601432664773"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007580415029008197"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00045502618507418957"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.007602410404504316"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "73741760585.77625"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "442450563514.6575"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6043085712339618"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004530724069916505"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1164"
}
}
},
"is_skipped": false
},
"Device=1 In=I16 Out=I64": {
"device": 1,
"type_config_index": 10,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "648"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007539600570987655"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005701338376763893"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.000749293333218421"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005730659247124155"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44781436738.365845"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "447814367383.65845"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6116345708365091"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007462590063859665"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "701"
}
}
},
"is_skipped": false
},
"Device=1 In=I16 Out=F64": {
"device": 1,
"type_config_index": 11,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I16"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "33554432"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "268435456"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "650"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0007515365646153841"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005320261152122883"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007468673968315132"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00533121216008688"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "44926893505.259796"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "449268935052.59796"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6136212508913325"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0007440289011028757"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "702"
}
}
},
"is_skipped": false
},
"Device=1 In=I32 Out=I8": {
"device": 1,
"type_config_index": 12,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I32 Out=I16": {
"device": 1,
"type_config_index": 13,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I32 Out=I32": {
"device": 1,
"type_config_index": 14,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I32 Out=F32": {
"device": 1,
"type_config_index": 15,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1688"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00027765218187203764"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005690620491369388"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00027302053109941316"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.005713997774637474"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "61450382256.75059"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "491603058054.0047"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6714421138193901"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00027140032503120137"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1928"
}
}
},
"is_skipped": false
},
"Device=1 In=I32 Out=I64": {
"device": 1,
"type_config_index": 16,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1134"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.000422905379188712"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004818481737573335"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004182333121013812"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004829428135064118"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "40114489961.844894"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "481373879542.13873"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6574708800564614"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004160488643510754"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1267"
}
}
},
"is_skipped": false
},
"Device=1 In=I32 Out=F64": {
"device": 1,
"type_config_index": 17,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I32"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1132"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004233320008833917"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004685003714910728"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041865130761381596"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004676709118042214"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "40074438309.11453"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "480893259709.37445"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6568144390698405"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041636213471617884"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1264"
}
}
},
"is_skipped": false
},
"Device=1 In=F32 Out=I8": {
"device": 1,
"type_config_index": 18,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F32 Out=I16": {
"device": 1,
"type_config_index": 19,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F32 Out=I32": {
"device": 1,
"type_config_index": 20,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1665"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002817099831831833"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.012603278274487326"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002770048382224978"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.0127786417628205"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "60566508901.63906"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "484532071213.1125"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6617844067049723"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002751834324989535"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1941"
}
}
},
"is_skipped": false
},
"Device=1 In=F32 Out=F32": {
"device": 1,
"type_config_index": 21,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=F32 Out=I64": {
"device": 1,
"type_config_index": 22,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1133"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0004230943777581643"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004719817832949844"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041844157444515244"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004751688895767683"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "40094524599.393234"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "481134295192.7188"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.657143650558237"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0004160357588015425"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1252"
}
}
},
"is_skipped": false
},
"Device=1 In=F32 Out=F64": {
"device": 1,
"type_config_index": 23,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F32"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "16777216"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "134217728"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1132"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00042342536395759757"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004748798224952708"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041871643782504436"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004750041166889743"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "40068204838.45002"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "480818458061.40027"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6567122733574632"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00041632065453087554"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "1252"
}
}
},
"is_skipped": false
},
"Device=1 In=I64 Out=I8": {
"device": 1,
"type_config_index": 24,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I64 Out=I16": {
"device": 1,
"type_config_index": 25,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I64 Out=I32": {
"device": 1,
"type_config_index": 26,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I64 Out=F32": {
"device": 1,
"type_config_index": 27,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=I64 Out=I64": {
"device": 1,
"type_config_index": 28,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
"Device=1 In=I64 Out=F64": {
"device": 1,
"type_config_index": 29,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "I64"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "8388608"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1753"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.0002666450433542495"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004046628770937376"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00026198611749762206"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004009600477982423"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "32019284380.88381"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512308550094.1409"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6997221237081251"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00026007244216493403"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2008"
}
}
},
"is_skipped": false
},
"Device=1 In=F64 Out=I8": {
"device": 1,
"type_config_index": 30,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I8"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F64 Out=I16": {
"device": 1,
"type_config_index": 31,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I16"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F64 Out=I32": {
"device": 1,
"type_config_index": 32,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F64 Out=F32": {
"device": 1,
"type_config_index": 33,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "F32"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
"Device=1 In=F64 Out=I64": {
"device": 1,
"type_config_index": 34,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "I64"
}
},
"summaries": {
"Element count: Items": {
"short_name": {
"type": "string",
"value": "Items"
},
"value": {
"type": "int64",
"value": "8388608"
}
},
"Input Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "InSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Output Buffer Size: ": {
"hint": {
"type": "string",
"value": "bytes"
},
"short_name": {
"type": "string",
"value": "OutSize"
},
"value": {
"type": "int64",
"value": "67108864"
}
},
"Number of Samples (Cold)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Samples"
},
"description": {
"type": "string",
"value": "Number of kernel executions in cold time measurements."
},
"value": {
"type": "int64",
"value": "1753"
}
},
"Average CPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "CPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time observed from host."
},
"value": {
"type": "float64",
"value": "0.00026657142213348556"
}
},
"CPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold CPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.004288873685096382"
}
},
"Average GPU Time (Cold)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "GPU Time"
},
"description": {
"type": "string",
"value": "Average isolated kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.0002619141041552483"
}
},
"GPU Relative Standard Deviation (Cold)": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "Noise"
},
"description": {
"type": "string",
"value": "Relative standard deviation of the cold GPU execution time measurements."
},
"value": {
"type": "float64",
"value": "0.00422427515777647"
}
},
"Element Throughput": {
"hint": {
"type": "string",
"value": "item_rate"
},
"short_name": {
"type": "string",
"value": "Elem/s"
},
"description": {
"type": "string",
"value": "Number of input elements handled per second."
},
"value": {
"type": "float64",
"value": "32028088090.39048"
}
},
"Average Global Memory Throughput": {
"hint": {
"type": "string",
"value": "byte_rate"
},
"short_name": {
"type": "string",
"value": "GlobalMem BW"
},
"description": {
"type": "string",
"value": "Number of bytes read/written per second to the CUDA device's global memory."
},
"value": {
"type": "float64",
"value": "512449409446.2477"
}
},
"Percent Peak Global Memory Throughput": {
"hint": {
"type": "string",
"value": "percentage"
},
"short_name": {
"type": "string",
"value": "BWPeak"
},
"description": {
"type": "string",
"value": "Global device memory throughput as a percentage of the device's peak bandwidth."
},
"value": {
"type": "float64",
"value": "0.6999145124648269"
}
},
"Average GPU Time (Batch)": {
"hint": {
"type": "string",
"value": "duration"
},
"short_name": {
"type": "string",
"value": "Batch GPU"
},
"description": {
"type": "string",
"value": "Average back-to-back kernel execution time as measured by CUDA events."
},
"value": {
"type": "float64",
"value": "0.00026007216520352087"
}
},
"Number of Samples (Batch)": {
"hint": {
"type": "string",
"value": "sample_size"
},
"short_name": {
"type": "string",
"value": "Batch"
},
"description": {
"type": "string",
"value": "Number of kernel executions in hot time measurements."
},
"value": {
"type": "int64",
"value": "2013"
}
}
},
"is_skipped": false
},
"Device=1 In=F64 Out=F64": {
"device": 1,
"type_config_index": 35,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 0.5,
"axis_values": {
"In": {
"type": "string",
"value": "F64"
},
"Out": {
"type": "string",
"value": "F64"
}
},
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
}
}
}
]
}