Files
nvbench/python/scripts/test_cmp.json
Nader Al Awar 5e7adc5c3f Build multi architecture cuda wheels (#302)
* Add cuda architectures to build wheel for

* Package scripts in wheel

* Separate cuda major version extraction to fix architecutre selection logic

* Add back statement printing cuda version

* [pre-commit.ci] auto code formatting

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2026-01-29 01:13:24 +00:00

17525 lines
529 KiB
JSON

{
"meta": {
"argv": [
"bin/nvbench.example.axes",
"--json",
"/home/av/code/src/nvbench/scripts/test_cmp.json"
],
"version": {
"json": {
"major": 1,
"minor": 0,
"patch": 0,
"string": "1.0.0"
},
"nvbench": {
"major": 0,
"minor": 1,
"patch": 0,
"string": "0.1.0",
"git_branch": "walltime_reports",
"git_sha": "348acbd6eb752a87e15c28fe1ad1cb827eaaadec",
"git_version": "old-cmake-63-g348acbd",
"git_is_dirty": false
}
}
},
"devices": [
{
"id": 0,
"name": "Quadro GV100",
"sm_version": 700,
"ptx_version": 700,
"sm_default_clock_rate": 1627000000,
"number_of_sms": 80,
"max_blocks_per_sm": 32,
"max_threads_per_sm": 2048,
"max_threads_per_block": 1024,
"registers_per_sm": 65536,
"registers_per_block": 65536,
"global_memory_size": 34086060032,
"global_memory_bus_peak_clock_rate": 850000000,
"global_memory_bus_width": 4096,
"global_memory_bus_bandwidth": 870400000000,
"l2_cache_size": 6291456,
"shared_memory_per_sm": 98304,
"shared_memory_per_block": 49152,
"ecc_state": false
},
{
"id": 1,
"name": "Quadro GP100",
"sm_version": 600,
"ptx_version": 600,
"sm_default_clock_rate": 1442500000,
"number_of_sms": 56,
"max_blocks_per_sm": 32,
"max_threads_per_sm": 2048,
"max_threads_per_block": 1024,
"registers_per_sm": 65536,
"registers_per_block": 65536,
"global_memory_size": 17069309952,
"global_memory_bus_peak_clock_rate": 715000000,
"global_memory_bus_width": 4096,
"global_memory_bus_bandwidth": 732160000000,
"l2_cache_size": 4194304,
"shared_memory_per_sm": 65536,
"shared_memory_per_block": 49152,
"ecc_state": false
}
],
"benchmarks": [
{
"name": "simple",
"index": 0,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": null,
"states": [
{
"name": "Device=0",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": null,
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "499"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001009524801603207"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006144561739025865"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010034006580799991"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005237510233783218"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.514396598"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "524"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001001475909284053"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524788153"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": null,
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "499"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010077174468937882"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00494341955894122"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010027929121602258"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00033287816568109313"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5123603010000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "524"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010014740456151597"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524795703"
}
]
}
],
"is_skipped": false
}
]
},
{
"name": "single_float64_axis",
"index": 1,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": [
{
"name": "Duration",
"type": "float64",
"flags": "",
"values": [
{
"input_string": "0",
"description": "",
"value": 0.0
},
{
"input_string": "0.0001",
"description": "",
"value": 0.0001
},
{
"input_string": "0.0002",
"description": "",
"value": 0.0002
},
{
"input_string": "0.0003",
"description": "",
"value": 0.00030000000000000003
},
{
"input_string": "0.0004",
"description": "",
"value": 0.0004
},
{
"input_string": "0.0005",
"description": "",
"value": 0.0005
},
{
"input_string": "0.0006",
"description": "",
"value": 0.0006000000000000001
},
{
"input_string": "0.0007",
"description": "",
"value": 0.0007000000000000001
},
{
"input_string": "0.0008",
"description": "",
"value": 0.0008000000000000001
},
{
"input_string": "0.0009",
"description": "",
"value": 0.0009000000000000002
},
{
"input_string": "0.001",
"description": "",
"value": 0.0010000000000000002
}
]
}
],
"states": [
{
"name": "Device=0 Duration=0",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "127632"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "9.535606282123409e-06"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.4448218958078975"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "3.918024581663389e-06"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.14066541529910018"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "11.513563003000002"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "274328"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.8226457245237315e-06"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.500101118"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0001",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "4853"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010851134411704107"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.053377272961503276"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010302993536069301"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004807683479660842"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6373502280000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "5088"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010137620362095862"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.51581551"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0002",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2459"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002088847271248475"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.027095357105136896"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00020339123081777852"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002426402384835198"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5670174410000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2582"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00020172880307174672"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.520878249"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0003",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.00030000000000000003"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1652"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003082859001210656"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01842186373388549"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003027270989578126"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0016270299573856555"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.544737606"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1736"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00030105657621462773"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522648918"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0004",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0004"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1241"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00040859692667203864"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.013800282471048258"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004030542842665574"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0012342926945401174"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.533285391"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1304"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00040140879812416123"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5234506480000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0005",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0005"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "994"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005090076327967808"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01104211789520747"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005034694101968762"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000960945456149481"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.526845475"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1044"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005017609577982818"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5238518600000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0006",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0006000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "830"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006083229987951809"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009259805546541143"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006027641820620359"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008125705181484989"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.52231507"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "872"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006010903174724053"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5241642790000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0007",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0007000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "712"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007086338553370777"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007928264539185437"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007030805292424196"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007112507950799924"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.519468829"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "748"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007014426981064088"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.52469385"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0008",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0008000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "623"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008089985730337072"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006971030802740222"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008034196651957732"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006306208005906063"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5170688250000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "654"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008017951428707951"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5243872230000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.0009",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0009000000000000002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "554"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009083576299638984"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006199510137107782"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009027842496276245"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005444417680564487"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.514841552"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "582"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009011235712320125"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524466611"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 Duration=0.001",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0010000000000000002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "499"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010087251282565122"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005573661860035435"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010031565917517711"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004852012011897464"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5138195830000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "524"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010014756763254413"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524785882"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "153013"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "7.705666139478051e-06"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.5262458153177543"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "3.057407826310601e-06"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.045574170376734044"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "15.000211589000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "369906"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.3516989302429717e-06"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.500042922"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0001",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "4879"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010713845111703245"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.045460323768744995"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010249834163043719"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0030010311127595573"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6230727620000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "5081"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00010137619922490036"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5151083350000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0002",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2465"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00020751516592292123"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.023007065837400455"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00020286964052951872"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014997658908938753"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.559679316"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2588"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00020172862033755555"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522088477"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0003",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.00030000000000000003"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1655"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003068471528700908"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.015443555151131"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00030220268294890517"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010498159491600372"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.539562934"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1736"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003010567520071284"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.52264897"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0004",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0004"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1243"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00040717730973451277"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.011630958382375049"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00040252058700697966"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007711533484593173"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5294895540000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1305"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004014086726981561"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.52385337"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0005",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0005"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "995"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005075514221105535"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009291726931158024"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005029017407690461"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006019586171273846"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523705419"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1044"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005017608408726951"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5238529980000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0006",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0006000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "831"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006068636666666669"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00775159368655319"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000602217434115358"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0005211064062823375"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5198631410000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "873"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006010892503176905"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524771732"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0007",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0007000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "712"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007072028300561799"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006649464561878749"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007025522259848826"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004304629385174026"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.516796464"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "748"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007014422085195939"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524693347"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0008",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0008000000000000001"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "623"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008076071910112361"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005864235047342223"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008029232501600935"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000386286201448909"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.514722272"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "655"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008017945267771947"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5251914990000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.0009",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0009000000000000002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "555"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009069257099099103"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005199849951312571"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009022579880448067"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000339409683584611"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.513695142"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "582"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00090112220790378"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5244711550000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 Duration=0.001",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "Duration",
"type": "float64",
"value": "0.0010000000000000002"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "499"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010072258977955914"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004633193202486146"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010026042473340073"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003115372302150914"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.511907711"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "524"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010014748609703007"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524787242"
}
]
}
],
"is_skipped": false
}
]
},
{
"name": "copy_sweep_grid_shape",
"index": 2,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": [
{
"name": "BlockSize",
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "6",
"description": "2^6 = 64",
"value": 64
},
{
"input_string": "8",
"description": "2^8 = 256",
"value": 256
},
{
"input_string": "10",
"description": "2^10 = 1024",
"value": 1024
}
]
},
{
"name": "NumBlocks",
"type": "int64",
"flags": "pow2",
"values": [
{
"input_string": "6",
"description": "2^6 = 64",
"value": 64
},
{
"input_string": "8",
"description": "2^8 = 256",
"value": 256
},
{
"input_string": "10",
"description": "2^10 = 1024",
"value": 1024
}
]
}
],
"states": [
{
"name": "Device=0 BlockSize=2^6 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "78"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00648948455128205"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0015111507522308748"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006484057010748448"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0012531664584969381"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "10349826333.845528"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "82798610670.76422"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.09512708027431552"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5077619640000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "81"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006481402361834491"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5250069540000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^8 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "672"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00217197076636905"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0074534188597851336"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002166515097376846"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00701989634431853"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "30975488738.229183"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "247803909905.83347"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.2847011832557829"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.473432187"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "673"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002163565506021122"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.4606610070000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^10 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "688"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010916693808139535"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01306842599006877"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001086250233269015"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012078568140597113"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "61780298815.71512"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "494242390525.72095"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.567833628820911"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.765170478"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "689"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010836307621832676"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.748836308"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^6 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "231"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002171097186147186"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00443956157556455"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0021655962921324217"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.003655102168422409"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "30988630819.05223"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "247909046552.41785"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.28482197443981827"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.506240788"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "243"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002161031840760031"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.525142297"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^8 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "736"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010725499320652177"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.011413433377036444"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001067108783223058"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010207282915832727"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "62888493708.49215"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "503107949667.9372"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5780192436442293"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.804676228"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "737"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010644761438770877"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.787106834"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^10 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1488"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009658611908602143"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007916411658808452"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009604295065966908"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005543866520742756"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "69873804937.337"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "558990439498.696"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.642222471850524"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.468436431"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1489"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009584573153443874"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.4370937890000002"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^6 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "528"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010655318598484856"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010411330423168705"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010600458776408978"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009061611791593436"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "63307509057.38994"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "506460072459.1195"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.581870487659834"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.573568937"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "529"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010571805049431119"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.559696812"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^8 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1032"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009617264147286825"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007568406287684157"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009562815504018629"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0049994946059467"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "70176888774.8577"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "561415110198.8616"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6450081688865598"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.013943156"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1033"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009539899551395297"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.9906108020000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 BlockSize=2^10 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "560"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010253841303571433"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.03170958999602246"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010199987426400187"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.031247624116965786"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "65793085025.09035"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "526344680200.7228"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6047158550100216"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5859060660000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "561"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010093532926046065"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5665410750000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^6 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2245"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0066631781487750605"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010029284827333777"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006658390919190473"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009997863006145854"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "10078841091.558964"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "80630728732.47171"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.11012719724168449"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "15.004037418000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2246"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006649344001406553"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "14.956094204000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^8 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "218"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002299290371559632"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0030536390935653273"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002294595665887955"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002251444609090054"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "29246487735.359"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "233971901882.872"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.3195638957097793"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5053657460000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "228"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002293837965580455"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523009414"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^10 NumBlocks=2^6",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "426"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001178968861502347"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005201397517740588"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011742734310212829"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0033150798656458847"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "57149265432.69776"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "457194123461.5821"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6244456450251067"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5103190240000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "450"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011726047092013889"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.527687784"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^6 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "226"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002219887185840708"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0026061881909203283"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002215178051881032"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0015054811845863602"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "30295020277.49602"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "242360162219.96817"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.33102076352159115"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.506002865"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "237"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0022142488503757913"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524792129"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^8 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "448"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011318572321428575"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007609392746712896"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011271811462938788"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0063649890038617206"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59536893622.33475"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "476295148978.678"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6505342397545317"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5155772900000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "470"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011258403372257314"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.529160021"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^10 NumBlocks=2^8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "256"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "447"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011232368366890376"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004611911863103576"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001118592285736562"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0019864118812352185"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59994034337.37313"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "479952274698.98505"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.655529221343675"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.510514425"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "470"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011164527406083776"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.52474862"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^6 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "64"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "448"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011216608169642855"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005148879095566737"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011169912165829119"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002975557170232136"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "60080028386.70366"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "480640227093.6293"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6564688416379333"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.510971747"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "470"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011154764378324467"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524288288"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^8 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "256"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "447"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011246830559284123"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004922192808378086"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011200362225240248"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002646502354772987"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59916690773.418724"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "479333526187.3498"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6546841212130542"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.511144538"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "469"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011177327820995468"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5242309390000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 BlockSize=2^10 NumBlocks=2^10",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "BlockSize",
"type": "int64",
"value": "1024"
},
{
"name": "NumBlocks",
"type": "int64",
"value": "1024"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "474"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010598897257383965"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004913062706223566"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001055195342387831"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0020503329663902545"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "63598521813.16255"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "508788174505.3004"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6949139184130524"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.511291385"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "498"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010538602162556477"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524838223"
}
]
}
],
"is_skipped": false
}
]
},
{
"name": "copy_type_sweep",
"index": 3,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": [
{
"name": "T",
"type": "type",
"flags": "",
"values": [
{
"input_string": "U8",
"description": "uint8_t",
"is_active": true
},
{
"input_string": "U16",
"description": "uint16_t",
"is_active": true
},
{
"input_string": "U32",
"description": "uint32_t",
"is_active": true
},
{
"input_string": "U64",
"description": "uint64_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
}
],
"states": [
{
"name": "Device=0 T=U8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U8"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2992"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0022994002396390365"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.024778400174351137"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002293938610882044"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.02459574709695746"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "117019459338.00893"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "234038918676.01785"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.2688866253171161"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "6.944152369"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2993"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0022801307408338873"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "6.853598372"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=U16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 1,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U16"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "672"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014471324925595243"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006960808950083016"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001441753045966228"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005894275617037584"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "93093424269.51526"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "372373697078.06104"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.4278190453562282"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.98617708"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "673"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001438309451800399"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.9712327470000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=U32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 2,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U32"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "848"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001072196766509434"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.011171612715506738"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010668103765204251"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009987563873112983"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "62906084789.7697"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "503248678318.1576"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5781809263765597"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.9268544190000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "849"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001064370134974818"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.9073867080000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=U64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 3,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1568"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009390030325255086"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008249904375540816"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009335942644701952"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005885112868111755"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "35941129114.62859"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "575058065834.0575"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6606825204894962"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.505405182"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1569"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009319373003763345"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.4727063310000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 4,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "752"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010726744441489362"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01103458165791857"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010672343821918702"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009783271621840188"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "62881092588.27738"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "503048740706.21906"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5779512186422553"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.822184149"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "753"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001063365562503555"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.803427655"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 T=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 5,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "544"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009393335257352945"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007706533640467741"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000933937587282237"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005096920917422438"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "35927916872.52203"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "574846669960.3525"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6604396483919491"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5221495780000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "565"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0009305206028761061"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.525756552"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=U8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U8"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2784"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0027056495269396513"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009323042699490573"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0027009093115727078"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009134230706566165"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "99387067477.54266"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "198774134955.08533"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.2714900226112944"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "7.588833747000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2785"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002695659536947251"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "7.532905504"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=U16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 1,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U16"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "330"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0015226199969696965"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005622755807814305"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0015179373560529775"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004685161525974869"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "88421124537.70831"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "353684498150.83325"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.4830699548607316"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.508664443"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "349"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0015155031045733347"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528926318"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=U32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 2,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U32"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "528"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011321445473484848"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007683380682909642"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011274604856064824"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006460085786164455"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59522142777.2707"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "476177142218.1656"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6503730635628354"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.607756878"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "529"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011264972348745013"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5965697010000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=U64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 3,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "U64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "478"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010525728723849374"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005381193387193611"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00104785640469156"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0028679450954259256"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "32021975386.863106"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "512351606189.8097"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6997809306569734"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.512086032"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "500"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010454827880859374"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5227592090000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 4,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "528"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011322722803030294"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0076927816018557355"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011275246077866272"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006425647618712464"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "59518757760.62857"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "476150062085.02856"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.650336076929945"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.608051446"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "529"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011258555051284391"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.596245555"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 T=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 5,
"axis_values": [
{
"name": "T",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "478"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010523593117154819"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005286902872056256"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001047618542256216"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.002706934586546566"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "32029245996.099976"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "512467935937.5996"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6999398163483386"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.51215675"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "503"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0010453338319691226"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.525822574"
}
]
}
],
"is_skipped": false
}
]
},
{
"name": "copy_type_conversion_sweep",
"index": 4,
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"devices": [
0,
1
],
"axes": [
{
"name": "In",
"type": "type",
"flags": "",
"values": [
{
"input_string": "I8",
"description": "int8_t",
"is_active": true
},
{
"input_string": "I16",
"description": "int16_t",
"is_active": true
},
{
"input_string": "I32",
"description": "int32_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "I64",
"description": "int64_t",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
},
{
"name": "Out",
"type": "type",
"flags": "",
"values": [
{
"input_string": "I8",
"description": "int8_t",
"is_active": true
},
{
"input_string": "I16",
"description": "int16_t",
"is_active": true
},
{
"input_string": "I32",
"description": "int32_t",
"is_active": true
},
{
"input_string": "F32",
"description": "float",
"is_active": true
},
{
"input_string": "I64",
"description": "int64_t",
"is_active": true
},
{
"input_string": "F64",
"description": "double",
"is_active": true
}
]
}
],
"states": [
{
"name": "Device=0 In=I8 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 0,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=I8 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 1,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1008"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006586167946428575"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.05914716011832632"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006528769518056576"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.057982657554439924"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "102789451847.54562"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "308368355542.63684"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.35428349671718384"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6850768"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1009"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006166892571539062"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.623953807"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I8 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 2,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "684"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007371795058479537"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008847285785468822"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000731761917384746"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004879904384809398"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "91708604131.57506"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "458543020657.87524"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5268187277778897"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5183809100000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "719"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000729479623935153"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5245085300000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I8 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 3,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "680"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007416148632352943"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00847915084559806"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000736235386308502"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0043064135466205815"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "91151369858.06279"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "455756849290.3139"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5236177036883202"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5183531140000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "718"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007336860167946988"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.526798324"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I8 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 4,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "536870912"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "528"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0012050906723484857"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.009907122479821073"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011996847262436706"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008827766250664237"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "55938750016.53507"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "503448750148.8156"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5784107883143562"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.647279072"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "529"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011969163755838723"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.634178896"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I8 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 5,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "536870912"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1040"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011804124500000013"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00957111143535521"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011749697549985022"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00836195198403357"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "57115396983.206215"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "514038572848.85596"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5905774044678952"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.249563157"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1041"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0011735446663800626"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "1.2279065690000002"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I16 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 6,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I16 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 7,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=I16 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 8,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1632"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00043062034803921626"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01672483523731806"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00042520409690983404"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.010836224516018789"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "78913708131.82764"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "473482248790.9659"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5439823630410913"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.737060063"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1633"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004232788786191731"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.695498727"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I16 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 9,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1184"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00043284459121621524"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.015105337896417907"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004274506211733894"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008262516586090977"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "78498966518.9634"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "470993799113.7804"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5411233905259426"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5372377770000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1232"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004249965618183087"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523606656"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I16 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 10,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "768"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006572663450520837"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.011014295443548292"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006518266665128367"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007223480904816997"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "51477538007.934814"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "514775380079.34814"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5914239201279275"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.520738605"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "796"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006487323243414338"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.516403752"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I16 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 11,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "880"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006565674102272736"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01097889608017816"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006511251280253577"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007150441437138621"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "51533001194.03968"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "515330011940.39685"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5920611350418162"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.596114039"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "881"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006487893111724723"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.572324006"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I32 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 12,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I32 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 13,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I32 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 14,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=I32 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 15,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1904"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026858391123949583"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.024532996731547897"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002631697807648852"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0133776106644627"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "63750541385.25386"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "510004331082.0309"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5859424759674068"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.551536212"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1969"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00025964095085147915"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5112442500000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I32 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 16,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1328"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00038312173493975965"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0162544382499927"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00037769761349422534"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00766160749599669"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44419703489.221306"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "533036441870.65564"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6124040003109554"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.536583359"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1388"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003753291852879593"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.520968135"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I32 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 17,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1328"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00038320030346385516"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.016795599989722854"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00037778021639819085"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008713885990809477"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44409990972.94271"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "532919891675.31256"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6122700961343205"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.536768873"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1377"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003755766647660222"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.517180861"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F32 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 18,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F32 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 19,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F32 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 20,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1904"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026906845745798324"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.023966161873692115"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026363512487033393"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012223432341603665"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "63638014882.31772"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "509104119058.54175"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5849082250213026"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.552411471"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1961"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002602678033419253"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5103970170000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F32 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 21,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=F32 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 22,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1328"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00038308867695783106"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.016918671591625058"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003776144570480286"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008723758091138187"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44429485383.46378"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "533153824601.56537"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6125388609852543"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5366656240000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1396"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003754830346749642"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5241867800000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F32 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 23,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1328"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00038299202560240965"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0167760658438423"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0003775633729949433"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008684523141297206"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44435496660.91339"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "533225959930.9607"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6126217370530339"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5364044290000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1404"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00037541050924534816"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5270894850000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=I64 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 24,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I64 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 25,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I64 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 26,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I64 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 27,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=I64 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 28,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=0 In=I64 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 29,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "8388608"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2112"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002423827249053035"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.02416424179820878"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00023696183533210337"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007872204592971034"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "35400671117.538055"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "566410737880.6089"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6507476308370966"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5565369530000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2205"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00023414492098922904"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5163003700000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F64 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 30,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F64 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 31,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F64 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 32,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F64 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 33,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=0 In=F64 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 34,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "8388608"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2112"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00024286170075757575"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.02414779678250403"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00023745110798909405"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008091753071026355"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "35327727341.60197"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "565243637465.6315"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6494067526029773"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5576062110000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2233"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00023462851593113247"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523938348"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=0 In=F64 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 0,
"type_config_index": 35,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I8 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 0,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I8 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 1,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1024"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000683441244140624"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.03316062878230732"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0006786162495845936"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.03212477441508221"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "98890741329.99298"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "296672223989.97894"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.4052013548814179"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.719493135"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1025"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000659710607877592"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.678423381"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I8 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 2,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "592"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008640211064189187"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00944028164858259"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008593624308705327"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007736902908752538"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "78091456630.25882"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "390457283151.29407"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5332950217866232"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5227107280000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "614"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008578363971523819"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5267261910000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I8 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 3,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "592"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0008612816165540544"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00975919715067052"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000856635513035832"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008096027798987914"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "78340044253.09521"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "391700221265.4761"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5349926536077853"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.521050054"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "599"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000855541095510747"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5124821390000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I8 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 4,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "536870912"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "672"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014581301889880955"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006071682334960142"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014534626205762236"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005154971568436681"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "46171716458.311646"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "415545448124.8048"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5675609813767548"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.9926510430000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "673"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.001450536531289656"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.9791953990000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I8 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 5,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I8"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "536870912"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "352"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014604223210227273"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.006352174295896549"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014556942754848428"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005460161744719934"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "46100932819.597916"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "414908395376.3812"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5666908809227235"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.520818273"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "361"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0014522860624783588"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524288878"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I16 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 6,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I16 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 7,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I16 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 8,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1104"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00046094446557971044"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012758359369013577"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004563006377252548"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007641740734756292"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "73535799045.2856"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "441214794271.71356"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6026207308125459"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.529887408"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1140"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.000454689802203262"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.518361091"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I16 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 9,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1104"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004598212318840582"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012784453789403875"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004551693620025247"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.007660841909416756"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "73718564563.25784"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "442311387379.54706"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6041184814515229"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5287007920000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1154"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004530425443599707"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5228252680000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I16 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 10,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "672"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007534447321428569"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008541470607558692"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007488120960160388"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005872606532245015"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44810216312.63993"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "448102163126.3993"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6120276485008732"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5189650410000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "701"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007457322407721113"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522771435"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I16 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 11,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I16"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "33554432"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "268435456"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "672"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007513076056547618"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.008183588591017211"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007466521440517336"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005268370256387482"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "44939845505.452805"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "449398455054.528"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6137981521177448"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.517594289"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "705"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0007440871218417554"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.524597968"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I32 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 12,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I32 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 13,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I32 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 14,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I32 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 15,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1840"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002776829885869563"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.017861418449176162"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002730688870924969"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.005670388220307151"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "61439500408.250595"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "491516003266.00476"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6713232125027382"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.546509219"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1927"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002714873839983621"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523169341"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I32 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 16,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1196"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004229804180602015"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012054556683918517"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004183483349290177"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004710258623264892"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "40103460679.117165"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "481241528149.406"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6572901116551109"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528636114"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1252"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004159514698357628"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.520784637"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I32 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 17,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I32"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1195"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004231393305439326"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012074190328924192"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004185113171653266"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004772051537598408"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "40087843056.75637"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "481054116681.0764"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6570341410089002"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528483731"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1258"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00041619300539050074"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.523585816"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F32 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 18,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F32 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 19,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F32 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 20,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1808"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00028167357632743345"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.021202385815789038"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00027701768152151984"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012884867104306086"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "60563700872.27331"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "484509606978.18646"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6617537245659234"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5443872160000001"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1858"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002751795970970129"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.5112970170000001"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F32 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 21,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=F32 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 22,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1195"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00042308463263598364"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012089671437059933"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004184455500237615"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004773399327485907"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "40094143668.26772"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "481129724019.21265"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.657137407150367"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528290021"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1264"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004161183321023289"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.525988793"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F32 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 23,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F32"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "16777216"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "134217728"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1195"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004230846794979085"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.012109978274913669"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0004184373557567601"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004746400070144573"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "40094928832.67498"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "481139145992.0998"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6571502758851887"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.528360885"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1255"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00041616955031436757"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.522306598"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=I64 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 24,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I64 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 25,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I64 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 26,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I64 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 27,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=I64 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 28,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
},
{
"name": "Device=1 In=I64 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 29,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "I64"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "8388608"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1909"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002665689759036145"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0182363750388233"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026191822334444936"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.003986137271503454"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "32027584384.489807"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "512441350151.8369"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6999035049058088"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.545965233"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2012"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002600545409185512"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.52324384"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F64 Out=I8",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 30,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I8"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F64 Out=I16",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 31,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I16"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F64 Out=I32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 32,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F64 Out=F32",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 33,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "F32"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Narrowing conversion: sizeof(InputType) > sizeof(OutputType)."
},
{
"name": "Device=1 In=F64 Out=I64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 34,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "I64"
}
],
"summaries": [
{
"tag": "nv/element_count/Items",
"name": "Items",
"description": "Number of elements: Items",
"data": [
{
"name": "value",
"type": "int64",
"value": "8388608"
}
]
},
{
"tag": "nv/gmem/reads/InSize",
"name": "InSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/gmem/writes/OutSize",
"name": "OutSize",
"hint": "bytes",
"data": [
{
"name": "value",
"type": "int64",
"value": "67108864"
}
]
},
{
"tag": "nv/cold/sample_size",
"name": "Samples",
"description": "Number of isolated kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "1909"
}
]
},
{
"tag": "nv/cold/time/cpu/mean",
"name": "CPU Time",
"description": "Mean isolated kernel execution time (measured on host CPU)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.00026655877475117843"
}
]
},
{
"tag": "nv/cold/time/cpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated CPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.01817264133840171"
}
]
},
{
"tag": "nv/cold/time/gpu/mean",
"name": "GPU Time",
"description": "Mean isolated kernel execution time (measured with CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002619331087848795"
}
]
},
{
"tag": "nv/cold/time/gpu/stdev/relative",
"name": "Noise",
"description": "Relative standard deviation of isolated GPU times",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.004144721519339008"
}
]
},
{
"tag": "nv/cold/bw/item_rate",
"name": "Elem/s",
"description": "Number of input elements processed per second",
"hint": "item_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "32025764283.542324"
}
]
},
{
"tag": "nv/cold/bw/global/bytes_per_second",
"name": "GlobalMem BW",
"description": "Number of bytes read/written per second to the CUDA device's global memory",
"hint": "byte_rate",
"data": [
{
"name": "value",
"type": "float64",
"value": "512412228536.6772"
}
]
},
{
"tag": "nv/cold/bw/global/utilization",
"name": "BWUtil",
"description": "Global device memory utilization as a percentage of the device's peak bandwidth",
"hint": "percentage",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.6998637299725158"
}
]
},
{
"tag": "nv/cold/walltime",
"name": "Walltime",
"description": "Walltime used for isolated measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.546077204"
}
]
},
{
"tag": "nv/batch/sample_size",
"name": "Samples",
"description": "Number of batch kernel executions",
"hint": "sample_size",
"data": [
{
"name": "value",
"type": "int64",
"value": "2003"
}
]
},
{
"tag": "nv/batch/time/gpu/mean",
"name": "Batch GPU",
"description": "Mean batch kernel execution time (measured by CUDA events)",
"hint": "duration",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.0002600450215789597"
}
]
},
{
"tag": "nv/batch/walltime",
"name": "Walltime",
"description": "Walltime used for batch measurements",
"hint": "duration",
"hide": "Hidden by default.",
"data": [
{
"name": "value",
"type": "float64",
"value": "0.520883479"
}
]
}
],
"is_skipped": false
},
{
"name": "Device=1 In=F64 Out=F64",
"min_samples": 10,
"min_time": 0.5,
"max_noise": 0.005,
"skip_time": -1.0,
"timeout": 15.0,
"device": 1,
"type_config_index": 35,
"axis_values": [
{
"name": "In",
"type": "string",
"value": "F64"
},
{
"name": "Out",
"type": "string",
"value": "F64"
}
],
"summaries": null,
"is_skipped": true,
"skip_reason": "Not a conversion: InputType == OutputType."
}
]
}
]
}